Ejemplo n.º 1
        private static void SoftmaxWithContext <TContext>(int numActions, IExplorer <int, float[]> explorer, IContextMapper <TContext, float[]> scorer, TContext[] contexts)
            where TContext : RegularTestContext
            var recorder = new TestRecorder <TContext>();
            //var mwtt = new MwtExplorer<TContext>("mwt", recorder);
            var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, scorer);

            uint[] actions = new uint[numActions];

            Random rand = new Random();

            for (uint i = 0; i < contexts.Length; i++)
                var uniqueId     = rand.NextDouble().ToString();
                int chosenAction = mwtt.ChooseAction(uniqueId, contexts[i]);
                actions[chosenAction - 1]++; // action id is one-based

            for (uint i = 0; i < numActions; i++)
                Assert.IsTrue(actions[i] > 0);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(contexts.Length, interactions.Count);

            for (int i = 0; i < contexts.Length; i++)
                Assert.AreEqual(i, interactions[i].Context.Id);
Ejemplo n.º 2
        private static void BootstrapWithContext <TContext>(uint numActions, TContext testContext1, TContext testContext2, TestPolicy <TContext>[] policies, IExplorer <TContext> explorer)
            where TContext : TestContext
            string uniqueKey = "ManagedTestId";

            var recorder = new TestRecorder <TContext>();
            var mwtt     = new MwtExplorer <TContext>("mwt", recorder);

            uint expectedAction = policies[0].ChooseAction(testContext1);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext1);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext2);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext1.Id, interactions[0].Context.Id);
            Assert.AreEqual(testContext2.Id, interactions[1].Context.Id);

            // Verify that policy action is chosen all the time
            for (int i = 0; i < 1000; i++)
                chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext1);
                Assert.AreEqual(expectedAction, chosenAction);
Ejemplo n.º 3
        public static void Clock()
            float  epsilon         = .2f;
            string uniqueKey       = "clock";
            int    numFeatures     = 1000;
            int    numIter         = 1000;
            int    numWarmup       = 100;
            int    numInteractions = 1;
            uint   numActions      = 10;

            double timeInit = 0, timeChoose = 0, timeSerializedLog = 0;

            System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch();
            for (int iter = 0; iter < numIter + numWarmup; iter++)

                StringRecorder <SimpleContext> recorder = new StringRecorder <SimpleContext>();
                StringPolicy policy = new StringPolicy();
                MwtExplorer <SimpleContext>           mwt      = new MwtExplorer <SimpleContext>("mwt", recorder);
                EpsilonGreedyExplorer <SimpleContext> explorer = new EpsilonGreedyExplorer <SimpleContext>(policy, epsilon, numActions);

                timeInit += (iter < numWarmup) ? 0 : watch.Elapsed.TotalMilliseconds;

                Feature[] f = new Feature[numFeatures];
                for (int i = 0; i < numFeatures; i++)
                    f[i].Id    = (uint)i + 1;
                    f[i].Value = 0.5f;


                SimpleContext context = new SimpleContext(f);

                for (int i = 0; i < numInteractions; i++)
                    mwt.ChooseAction(explorer, uniqueKey, context);

                timeChoose += (iter < numWarmup) ? 0 : watch.Elapsed.TotalMilliseconds;


                string interactions = recorder.GetRecording();

                timeSerializedLog += (iter < numWarmup) ? 0 : watch.Elapsed.TotalMilliseconds;

                for (int i = 0; i < numInteractions; i++)
                    mwt.ChooseAction(explorer, uniqueKey, context);
            Console.WriteLine("--- PER ITERATION ---");
            Console.WriteLine("# iterations: {0}, # interactions: {1}, # context features {2}", numIter, numInteractions, numFeatures);
            Console.WriteLine("Init: {0} micro", timeInit * 1000 / numIter);
            Console.WriteLine("Choose Action: {0} micro", timeChoose * 1000 / (numIter * numInteractions));
            Console.WriteLine("Get Serialized Log: {0} micro", timeSerializedLog * 1000 / numIter);
            Console.WriteLine("--- TOTAL TIME: {0} micro", (timeInit + timeChoose + timeSerializedLog) * 1000);
Ejemplo n.º 4
        public void Dispose()
            if (this.settingsDownloader != null)
                this.settingsDownloader = null;

            if (this.modelDownloader != null)
                this.modelDownloader = null;

            if (this.recorder != null)
                // Flush any pending data to be logged
                var disposable = this.recorder as IDisposable;
                if (disposable != null)

                recorder = null;

            if (this.mwtExplorer != null)
                this.mwtExplorer = null;
Ejemplo n.º 5
        private static void EpsilonGreedyWithContext <TContext>(int numActions, TContext testContext, TestPolicy <TContext> policy, IExplorer <int, int> explorer)
            where TContext : RegularTestContext
            string uniqueKey = "ManagedTestId";
            var    uniqueId  = uniqueKey;
            TestRecorder <TContext> recorder = new TestRecorder <TContext>();
            //MwtExplorer<TContext> mwtt = new MwtExplorer<TContext>("mwt", recorder);
            var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, policy);

            testContext.Id = 100;

            int expectedAction = policy.MapContext(testContext).Value;

            int chosenAction = mwtt.ChooseAction(uniqueId, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(uniqueId, testContext);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);

            // Verify that policy action is chosen all the time
            for (int i = 0; i < 1000; i++)
                chosenAction = mwtt.ChooseAction(uniqueId, testContext);
                Assert.AreEqual(expectedAction, chosenAction);
Ejemplo n.º 6
        public void EpsilonGreedy()
            uint   numActions = 10;
            float  epsilon    = 0f;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestPolicy policy = new TestPolicy();
            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            TestContext testContext        = new TestContext();

            testContext.Id = 100;

            var explorer = new EpsilonGreedyExplorer <TestContext>(policy, epsilon, numActions);

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
Ejemplo n.º 7
        public void UsageBadVariableActionContext()
            int numExceptionsCaught   = 0;
            int numExceptionsExpected = 5;

            var tryCatchArgumentException = (Action <Action>)((action) => {
                catch (ArgumentException ex)
                    if (ex.ParamName.ToLower() == "ctx")

            tryCatchArgumentException(() => {
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var policy   = new TestPolicy <TestContext>();
                var explorer = new EpsilonGreedyExplorer <TestContext>(policy, 0.2f);
                mwt.ChooseAction(explorer, "key", new TestContext());
            tryCatchArgumentException(() =>
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var policy   = new TestPolicy <TestContext>();
                var explorer = new TauFirstExplorer <TestContext>(policy, 10);
                mwt.ChooseAction(explorer, "key", new TestContext());
            tryCatchArgumentException(() =>
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var policies = new TestPolicy <TestContext> [2];
                for (int i = 0; i < 2; i++)
                    policies[i] = new TestPolicy <TestContext>(i * 2);
                var explorer = new BootstrapExplorer <TestContext>(policies);
                mwt.ChooseAction(explorer, "key", new TestContext());
            tryCatchArgumentException(() =>
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var scorer   = new TestScorer <TestContext>(10);
                var explorer = new SoftmaxExplorer <TestContext>(scorer, 0.5f);
                mwt.ChooseAction(explorer, "key", new TestContext());
            tryCatchArgumentException(() =>
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var scorer   = new TestScorer <TestContext>(10);
                var explorer = new GenericExplorer <TestContext>(scorer);
                mwt.ChooseAction(explorer, "key", new TestContext());

            Assert.AreEqual(numExceptionsExpected, numExceptionsCaught);
Ejemplo n.º 8
        public void TauFirst()
            uint   numActions = 10;
            uint   tau        = 0;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestPolicy policy = new TestPolicy();
            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            TestContext testContext        = new TestContext()
                Id = 100

            var explorer = new TauFirstExplorer <TestContext>(policy, tau, numActions);

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(0, interactions.Count);
Ejemplo n.º 9
        private void EndToEnd(MwtExplorer <SimpleContext> mwtt, IExplorer <SimpleContext> explorer, TestRecorder <SimpleContext> recorder)
            uint numActions = 10;

            Random rand = new Random();

            List <float> rewards = new List <float>();

            for (int i = 0; i < 1000; i++)
                Feature[] f = new Feature[rand.Next(800, 1201)];
                for (int j = 0; j < f.Length; j++)
                    f[j].Id    = (uint)(j + 1);
                    f[j].Value = (float)rand.NextDouble();
                SimpleContext c = new SimpleContext(f);

                mwtt.ChooseAction(explorer, i.ToString(), c);


            var testInteractions = recorder.GetAllInteractions();

            Interaction[] partialInteractions = new Interaction[testInteractions.Count];
            for (int i = 0; i < testInteractions.Count; i++)
                partialInteractions[i] = new Interaction()
                    ApplicationContext = new OldSimpleContext(testInteractions[i].Context.GetFeatures(), null),
                    ChosenAction       = testInteractions[i].Action,
                    Probability        = testInteractions[i].Probability,
                    Id = testInteractions[i].UniqueKey

            MwtRewardReporter mrr = new MwtRewardReporter(partialInteractions);

            for (int i = 0; i < partialInteractions.Length; i++)
                Assert.AreEqual(true, mrr.ReportReward(partialInteractions[i].GetId(), rewards[i]));

            Interaction[] completeInteractions = mrr.GetAllInteractions();
            MwtOptimizer  mop = new MwtOptimizer(completeInteractions, numActions);

            string modelFile = "model";



            float evaluatedValue = mop.EvaluatePolicyVWCSOAA(modelFile);


Ejemplo n.º 10
        public void EndToEndGeneric()
            uint numActions = 10;
            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            TestScorer <SimpleContext>   scorer   = new TestScorer <SimpleContext>(numActions);

            MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder);
            var explorer = new GenericExplorer <SimpleContext>(scorer, numActions);

            EndToEnd(mwtt, explorer, recorder);
Ejemplo n.º 11
        public void EndToEndSoftmax()
            uint  numActions = 10;
            float lambda     = 0.5f;
            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            TestScorer <SimpleContext>   scorer   = new TestScorer <SimpleContext>(numActions);

            MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer <SimpleContext>(scorer, lambda, numActions);

            EndToEnd(mwtt, explorer, recorder);
Ejemplo n.º 12
        public void EndToEndEpsilonGreedy()
            uint  numActions = 10;
            float epsilon    = 0.5f;

            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            MwtExplorer <SimpleContext>  mwtt     = new MwtExplorer <SimpleContext>("mwt", recorder);

            TestSimplePolicy policy = new TestSimplePolicy();
            var explorer            = new EpsilonGreedyExplorer <SimpleContext>(policy, epsilon, numActions);

            EndToEnd(mwtt, explorer, recorder);
Ejemplo n.º 13
        public void EndToEndTauFirst()
            uint numActions = 10;
            uint tau        = 5;

            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            TestSimplePolicy             policy   = new TestSimplePolicy();
            MwtExplorer <SimpleContext>  mwtt     = new MwtExplorer <SimpleContext>("mwt", recorder);

            var explorer = new TauFirstExplorer <SimpleContext>(policy, tau, numActions);

            EndToEnd(mwtt, explorer, recorder);
Ejemplo n.º 14
        private static void GenericWithContext <TContext>(uint numActions, TContext testContext, IExplorer <TContext> explorer)
            where TContext : TestContext
            string uniqueKey = "ManagedTestId";
            var    recorder  = new TestRecorder <TContext>();

            var mwtt = new MwtExplorer <TContext>("mwt", recorder);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(1, interactions.Count);
            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
Ejemplo n.º 15
        private static void GenericWithContext <TContext>(int numActions, TContext testContext, IExplorer <int, float[]> explorer, IContextMapper <TContext, float[]> scorer)
            where TContext : RegularTestContext
            string uniqueKey = "ManagedTestId";
            var    recorder  = new TestRecorder <TContext>();

            //var mwtt = new MwtExplorer<TContext>("mwt", recorder);
            var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, scorer);

            int chosenAction = mwtt.ChooseAction(uniqueKey, testContext);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(1, interactions.Count);
            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
Ejemplo n.º 16
        public void EndToEndBagging()
            uint numActions = 10;
            uint numbags    = 2;

            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();

            TestSimplePolicy[] policies = new TestSimplePolicy[numbags];
            for (int i = 0; i < numbags; i++)
                policies[i] = new TestSimplePolicy();

            MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder);
            var explorer = new BaggingExplorer <SimpleContext>(policies, numbags, numActions);

            EndToEnd(mwtt, explorer, recorder);
Ejemplo n.º 17
        static void ExploreTauFirst <TContext>
            string appId,
            int policyType,
            JToken configPolicy,
            int tau,
            int numActions,
            string[] experimentalUnitIdList,
            TContext[] contextList,
            string outputFile
            var recorder = new StringRecorder <TContext>();

            bool isVariableActionContext = typeof(IVariableActionContext).IsAssignableFrom(typeof(TContext));

            switch (policyType)
            case 0:     // fixed policy
                var policyAction = configPolicy["Action"].Value <uint>();

                var policy = new TestPolicy <TContext> {
                    ActionToChoose = policyAction

                var explorer = new TauFirstExplorer(tau);

                var mwt = isVariableActionContext ?
                          MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, policy) :
                          MwtExplorer.Create(appId, numActions, recorder, explorer, policy);

                for (int i = 0; i < experimentalUnitIdList.Length; i++)
                    int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue;
                    mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]);

                File.AppendAllText(outputFile, recorder.GetRecording());

Ejemplo n.º 18
        public void Softmax()
            uint  numActions      = 10;
            float lambda          = 0.5f;
            uint  numActionsCover = 100;
            float C = 5;

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestScorer <TestContext>   scorer   = new TestScorer <TestContext>(numActions);

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions);

            uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions);

            uint[] actions = new uint[numActions];

            Random rand = new Random();

            for (uint i = 0; i < numDecisions; i++)
                uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
                    Id = (int)i
                actions[chosenAction - 1]++; // action id is one-based

            for (uint i = 0; i < numActions; i++)
                Assert.IsTrue(actions[i] > 0);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(numDecisions, (uint)interactions.Count);

            for (int i = 0; i < numDecisions; i++)
                Assert.AreEqual(i, interactions[i].Context.Id);
Ejemplo n.º 19
        public void Bootstrap()
            uint   numActions = 10;
            uint   numbags    = 2;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();

            TestPolicy[] policies = new TestPolicy[numbags];
            for (int i = 0; i < numbags; i++)
                policies[i] = new TestPolicy(i * 2);
            TestContext testContext1 = new TestContext()
                Id = 99
            TestContext testContext2 = new TestContext()
                Id = 100

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new BootstrapExplorer <TestContext>(policies, numActions);

            uint expectedAction = policies[0].ChooseAction(testContext1);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext1);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext2);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext1.Id, interactions[0].Context.Id);
            Assert.AreEqual(testContext2.Id, interactions[1].Context.Id);
Ejemplo n.º 20
        public void Generic()
            uint   numActions = 10;
            string uniqueKey  = "ManagedTestId";
            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestScorer <TestContext>   scorer   = new TestScorer <TestContext>(numActions);

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new GenericExplorer <TestContext>(scorer, numActions);

            TestContext testContext = new TestContext()
                Id = 100
            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(1, interactions.Count);
            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
Ejemplo n.º 21
        /// <summary>
        /// Diposes resources.
        /// </summary>
        public virtual void Dispose(bool disposing)
            // Always free unmanaged objects, but conditionally free managed objets if this is being
            // called from Dispose() (as opposed a finalizer, currently not implemented)
            if (disposing)
                if (this.settingsDownloader != null)
                    this.settingsDownloader = null;

                if (this.modelDownloader != null)
                    this.modelDownloader = null;

                if (this.recorder != null)
                    // Flush any pending data to be logged
                    var disposable = this.recorder as IDisposable;
                    if (disposable != null)

                    recorder = null;

                if (this.mwtExplorer != null)
                    this.mwtExplorer = null;
Ejemplo n.º 22
        public void SoftmaxScores()
            uint  numActions = 10;
            float lambda     = 0.5f;
            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestScorer <TestContext>   scorer   = new TestScorer <TestContext>(numActions, uniform: false);

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions);

            Random rand = new Random();

            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
                Id = 100
            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
                Id = 101
            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
                Id = 102

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(3, interactions.Count);

            for (int i = 0; i < interactions.Count; i++)
                // Scores are not equal therefore probabilities should not be uniform
                Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions);
                Assert.AreEqual(100 + i, interactions[i].Context.Id);
Ejemplo n.º 23
        public static void Run()
            string exploration_type = "greedy";

            if (exploration_type == "greedy")
                // Initialize Epsilon-Greedy explore algorithm using built-in StringRecorder and SimpleContext types
                StringRecorder <SimpleContext> recorder = new StringRecorder <SimpleContext>();
                MwtExplorer <SimpleContext>    mwtt     = new MwtExplorer <SimpleContext>("mwt", recorder);

                uint          numActions = 10;
                float         epsilon    = 0.2f;
                StringPolicy  policy     = new StringPolicy();
                SimpleContext context    = new SimpleContext(new Feature[] {
                    new Feature()
                        Id = 1, Value = 0.5f
                    new Feature()
                        Id = 4, Value = 1.3f
                    new Feature()
                        Id = 9, Value = -0.5f
                uint action = mwtt.ChooseAction(new EpsilonGreedyExplorer <SimpleContext>(policy, epsilon, numActions), "key", context);


            else if (exploration_type == "tau-first")
                // Initialize Tau-First explore algorithm using custom Recorder, Policy & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint     numActions = 10;
                uint     tau        = 0;
                MyPolicy policy     = new MyPolicy();
                uint     action     = mwtt.ChooseAction(new TauFirstExplorer <MyContext>(policy, tau, numActions), "key", new MyContext());
                Console.WriteLine(String.Join(",", recorder.GetData()));
            else if (exploration_type == "bagging")
                // Initialize Bagging explore algorithm using custom Recorder, Policy & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint       numActions = 10;
                uint       numbags    = 2;
                MyPolicy[] policies   = new MyPolicy[numbags];
                for (int i = 0; i < numbags; i++)
                    policies[i] = new MyPolicy(i * 2);
                uint action = mwtt.ChooseAction(new BaggingExplorer <MyContext>(policies, numbags, numActions), "key", new MyContext());
                Console.WriteLine(String.Join(",", recorder.GetData()));
            else if (exploration_type == "softmax")
                // Initialize Softmax explore algorithm using custom Recorder, Scorer & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint     numActions = 10;
                float    lambda     = 0.5f;
                MyScorer scorer     = new MyScorer(numActions);
                uint     action     = mwtt.ChooseAction(new SoftmaxExplorer <MyContext>(scorer, lambda, numActions), "key", new MyContext());

                Console.WriteLine(String.Join(",", recorder.GetData()));
            else if (exploration_type == "generic")
                // Initialize Generic explore algorithm using custom Recorder, Scorer & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint     numActions = 10;
                MyScorer scorer     = new MyScorer(numActions);
                uint     action     = mwtt.ChooseAction(new GenericExplorer <MyContext>(scorer, numActions), "key", new MyContext());

                Console.WriteLine(String.Join(",", recorder.GetData()));
            {  //add error here
        public static void Run()
            string exploration_type = "greedy";

            if (exploration_type == "greedy")
                // Initialize Epsilon-Greedy explore algorithm using built-in StringRecorder and SimpleContext types

                // Creates a recorder of built-in StringRecorder type for string serialization
                StringRecorder <SimpleContext> recorder = new StringRecorder <SimpleContext>();

                int   numActions = 10;
                float epsilon    = 0.2f;
                // Creates an Epsilon-Greedy explorer using the specified settings
                var explorer = new EpsilonGreedyExplorer(epsilon);

                // Creates an MwtExplorer instance using the recorder above
                // Creates a policy that interacts with SimpleContext type
                var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, new StringPolicy());

                // Creates a context of built-in SimpleContext type
                SimpleContext context = new SimpleContext(new float[] { .5f, 1.3f, -.5f });

                // Performs exploration by passing an instance of the Epsilon-Greedy exploration algorithm into MwtExplorer
                // using a sample string to uniquely identify this event
                string uniqueKey = "eventid";
                int    action    = mwtt.ChooseAction(uniqueKey, context);


            else if (exploration_type == "tau-first")
                // Initialize Tau-First explore algorithm using custom Recorder, Policy & Context types
                MyRecorder recorder = new MyRecorder();

                int numActions = 10;
                int tau        = 0;

                //MwtExplorer<MyContext> mwtt = new MwtExplorer<MyContext>("mwt", recorder);
                var mwtt = MwtExplorer.Create("mwt", numActions, recorder, new TauFirstExplorer(tau), new MyPolicy());

                int action = mwtt.ChooseAction("key", new MyContext());
                Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            else if (exploration_type == "bootstrap")
                // TODO: add support for bootstrap
                //// Initialize Bootstrap explore algorithm using custom Recorder, Policy & Context types
                //MyRecorder recorder = new MyRecorder();
                ////MwtExplorer<MyContext> mwtt = new MwtExplorer<MyContext>("mwt", recorder);

                //uint numActions = 10;
                //uint numbags = 2;
                //MyPolicy[] policies = new MyPolicy[numbags];
                //for (int i = 0; i < numbags; i++)
                //    policies[i] = new MyPolicy(i * 2);
                //var mwtt = MwtExplorer.Create("mwt", recorder, new BootstrapExplorer(numActions));
                //uint action = mwtt.ChooseAction(new BootstrapExplorer<MyContext>(policies, numActions), "key", new MyContext());
                //Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            else if (exploration_type == "softmax")
                // TODO: add support for softmax
                //// Initialize Softmax explore algorithm using custom Recorder, Scorer & Context types
                //MyRecorder recorder = new MyRecorder();
                //MwtExplorer<MyContext> mwtt = new MwtExplorer<MyContext>("mwt", recorder);

                //uint numActions = 10;
                //float lambda = 0.5f;
                //MyScorer scorer = new MyScorer(numActions);
                //uint action = mwtt.ChooseAction(new SoftmaxExplorer<MyContext>(scorer, lambda, numActions), "key", new MyContext());

                //Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            else if (exploration_type == "generic")
                // TODO: add support for generic
                //// Initialize Generic explore algorithm using custom Recorder, Scorer & Context types
                //MyRecorder recorder = new MyRecorder();
                //MwtExplorer<MyContext> mwtt = new MwtExplorer<MyContext>("mwt", recorder);

                //uint numActions = 10;
                //MyScorer scorer = new MyScorer(numActions);
                //uint action = mwtt.ChooseAction(new GenericExplorer<MyContext>(scorer, numActions), "key", new MyContext());

                //Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            {  //add error here
Ejemplo n.º 25
        public static void Clock()
            float  epsilon         = .2f;
            int    policyParams    = 1003;
            string uniqueKey       = "clock";
            int    numFeatures     = 1000;
            int    numIter         = 1000;
            int    numWarmup       = 100;
            int    numInteractions = 1;
            uint   numActions      = 10;
            string otherContext    = null;

            double timeInit = 0, timeChoose = 0, timeSerializedLog = 0, timeTypedLog = 0;

            System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch();
            for (int iter = 0; iter < numIter + numWarmup; iter++)

                MwtExplorer mwt = new MwtExplorer("test");
                mwt.InitializeEpsilonGreedy <int>(epsilon, new StatefulPolicyDelegate <int>(SampleStatefulPolicyFunc), policyParams, numActions);

                timeInit += (iter < numWarmup) ? 0 : watch.Elapsed.TotalMilliseconds;

                FEATURE[] f = new FEATURE[numFeatures];
                for (int i = 0; i < numFeatures; i++)
                    f[i].Index = (uint)i + 1;
                    f[i].X     = 0.5f;


                CONTEXT context = new CONTEXT(f, otherContext);

                for (int i = 0; i < numInteractions; i++)
                    mwt.ChooseAction(context, uniqueKey);

                timeChoose += (iter < numWarmup) ? 0 : watch.Elapsed.TotalMilliseconds;


                string interactions = mwt.GetAllInteractionsAsString();

                timeSerializedLog += (iter < numWarmup) ? 0 : watch.Elapsed.TotalMilliseconds;

                for (int i = 0; i < numInteractions; i++)
                    mwt.ChooseAction(context, uniqueKey);



                timeTypedLog += (iter < numWarmup) ? 0 : watch.Elapsed.TotalMilliseconds;
            Console.WriteLine("--- PER ITERATION ---");
            Console.WriteLine("# iterations: {0}, # interactions: {1}, # context features {2}", numIter, numInteractions, numFeatures);
            Console.WriteLine("Init: {0} micro", timeInit * 1000 / numIter);
            Console.WriteLine("Choose Action: {0} micro", timeChoose * 1000 / (numIter * numInteractions));
            Console.WriteLine("Get Serialized Log: {0} micro", timeSerializedLog * 1000 / numIter);
            Console.WriteLine("Get Typed Log: {0} micro", timeTypedLog * 1000 / numIter);
            Console.WriteLine("--- TOTAL TIME: {0} micro", (timeInit + timeChoose + timeSerializedLog + timeTypedLog) * 1000);
Ejemplo n.º 26
        public static void Run()
            string    interactionFile = "serialized.txt";
            MwtLogger logger          = new MwtLogger(interactionFile);

            MwtExplorer mwt = new MwtExplorer("test", logger);

            uint numActions = 10;

            float epsilon = 0.2f;
            uint  tau     = 0;
            uint  bags    = 2;
            float lambda  = 0.5f;

            int          policyParams = 1003;
            CustomParams customParams = new CustomParams()
                Value1 = policyParams, Value2 = policyParams + 1

            /*** Initialize Epsilon-Greedy explore algorithm using a default policy function that accepts parameters ***/
            mwt.InitializeEpsilonGreedy <int>(epsilon, new StatefulPolicyDelegate <int>(SampleStatefulPolicyFunc), policyParams, numActions);

            /*** Initialize Epsilon-Greedy explore algorithm using a stateless default policy function ***/
            //mwt.InitializeEpsilonGreedy(epsilon, new StatelessPolicyDelegate(SampleStatelessPolicyFunc), numActions);

            /*** Initialize Tau-First explore algorithm using a default policy function that accepts parameters ***/
            //mwt.InitializeTauFirst<CustomParams>(tau, new StatefulPolicyDelegate<CustomParams>(SampleStatefulPolicyFunc), customParams, numActions);

            /*** Initialize Tau-First explore algorithm using a stateless default policy function ***/
            //mwt.InitializeTauFirst(tau, new StatelessPolicyDelegate(SampleStatelessPolicyFunc), numActions);

            /*** Initialize Bagging explore algorithm using a default policy function that accepts parameters ***/
            //StatefulPolicyDelegate<int>[] funcs =
            //    new StatefulPolicyDelegate<int>(SampleStatefulPolicyFunc),
            //    new StatefulPolicyDelegate<int>(SampleStatefulPolicyFunc2)
            //int[] parameters = { policyParams, policyParams };
            //mwt.InitializeBagging<int>(bags, funcs, parameters, numActions);

            /*** Initialize Bagging explore algorithm using a stateless default policy function ***/
            //StatelessPolicyDelegate[] funcs =
            //    new StatelessPolicyDelegate(SampleStatelessPolicyFunc),
            //    new StatelessPolicyDelegate(SampleStatelessPolicyFunc2)
            //mwt.InitializeBagging(bags, funcs, numActions);

            /*** Initialize Softmax explore algorithm using a default policy function that accepts parameters ***/
            //mwt.InitializeSoftmax<int>(lambda, new StatefulScorerDelegate<int>(SampleStatefulScorerFunc), policyParams, numActions);

            /*** Initialize Softmax explore algorithm using a stateless default policy function ***/
            //mwt.InitializeSoftmax(lambda, new StatelessScorerDelegate(SampleStatelessScorerFunc), numActions);

            FEATURE[] f = new FEATURE[2];
            f[0].X     = 0.5f;
            f[0].Index = 1;
            f[1].X     = 0.9f;
            f[1].Index = 2;

            string  otherContext = "Some other context data that might be helpful to log";
            CONTEXT context      = new CONTEXT(f, otherContext);

            UInt32 chosenAction = mwt.ChooseAction(context, "myId");

            INTERACTION[] interactions = mwt.GetAllInteractions();


            MwtRewardReporter mrr = new MwtRewardReporter(interactions);

            string joinKey = "myId";
            float  reward  = 0.5f;

            if (!mrr.ReportReward(joinKey, reward))
                throw new Exception();

            MwtOptimizer mot = new MwtOptimizer(interactions, numActions);

            float eval1 = mot.EvaluatePolicy(new StatefulPolicyDelegate <int>(SampleStatefulPolicyFunc), policyParams);

            float eval2 = mot.EvaluatePolicyVWCSOAA("model_file");



            // Create a new logger to read back interaction data
            logger = new MwtLogger(interactionFile);
            INTERACTION[] inters = logger.GetAllInteractions();

            // Load and save reward data to file
            string      rewardFile  = "rewards.txt";
            RewardStore rewardStore = new RewardStore(rewardFile);

            rewardStore.Add(new float[2] {
                1.0f, 0.4f

            // Read back reward data
            rewardStore = new RewardStore(rewardFile);
            float[] rewards = rewardStore.GetAllRewards();
Ejemplo n.º 27
        public void SoftmaxScores()
            int   numActions = 10;
            float lambda     = 0.5f;
            var   recorder   = new TestRecorder <RegularTestContext>();
            var   scorer     = new TestScorer <RegularTestContext>(1, numActions, uniform: false);

            //var mwtt = new MwtExplorer<RegularTestContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer(lambda);

            var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, scorer);

            Random rand = new Random();

            mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
                Id = 100
            mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
                Id = 101
            mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
                Id = 102

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(3, interactions.Count);

            for (int i = 0; i < interactions.Count; i++)
                // Scores are not equal therefore probabilities should not be uniform
                Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions);
                Assert.AreEqual(100 + i, interactions[i].Context.Id);

            // Verify that policy action is chosen all the time
            RegularTestContext context = new RegularTestContext {
                Id = 100
            List <float> scores             = scorer.MapContext(context).Value.ToList();
            float        maxScore           = 0;
            int          highestScoreAction = 0;

            for (int i = 0; i < scores.Count; i++)
                if (maxScore < scores[i])
                    maxScore           = scores[i];
                    highestScoreAction = i + 1;

            for (int i = 0; i < 1000; i++)
                int chosenAction = mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext()
                    Id = (int)i
                Assert.AreEqual(highestScoreAction, chosenAction);
Ejemplo n.º 28
        public DecisionServiceClient(
            DecisionServiceConfiguration config,
            ApplicationClientMetadata metaData,
            IContextMapper <TContext, ActionProbability[]> internalPolicy,
            IContextMapper <TContext, ActionProbability[]> initialPolicy = null,
            IFullExplorer <int[]> initialFullExplorer = null,
            IInitialExplorer <ActionProbability[], int[]> initialExplorer = null)
            if (config == null)
                throw new ArgumentNullException("config");

            if (config.InteractionUploadConfiguration == null)
                config.InteractionUploadConfiguration = new JoinUploader.BatchingConfiguration(config.DevelopmentMode);

            if (config.ObservationUploadConfiguration == null)
                config.ObservationUploadConfiguration = new JoinUploader.BatchingConfiguration(config.DevelopmentMode);

            this.config = config;
            string appId = string.Empty;

            this.metaData = metaData;

            if (config.OfflineMode)
                this.recorder = new OfflineRecorder();
                if (config.OfflineApplicationID == null)
                    throw new ArgumentNullException("OfflineApplicationID", "Offline Application ID must be set explicitly in offline mode.");
                appId = config.OfflineApplicationID;
                if (metaData == null)
                    throw new Exception("Unable to locate a registered MWT application.");

                if (this.recorder == null)
                    var joinServerLogger = new JoinServiceLogger <TContext, int[]>(metaData.ApplicationID, config.DevelopmentMode); // TODO: check token remove
                    switch (config.JoinServerType)
                    case JoinServerType.CustomSolution:

                    case JoinServerType.AzureStreamAnalytics:
                    this.recorder = (IRecorder <TContext, int[]>)joinServerLogger;

                var settingsBlobPollDelay = config.PollingForSettingsPeriod == TimeSpan.Zero ? DecisionServiceConstants.PollDelay : config.PollingForSettingsPeriod;
                if (settingsBlobPollDelay != TimeSpan.MinValue)
                    this.settingsDownloader             = new AzureBlobBackgroundDownloader(config.SettingsBlobUri, settingsBlobPollDelay, downloadImmediately: false, storageConnectionString: config.AzureStorageConnectionString);
                    this.settingsDownloader.Downloaded += this.UpdateSettings;
                    this.settingsDownloader.Failed     += settingsDownloader_Failed;

                var modelBlobPollDelay = config.PollingForModelPeriod == TimeSpan.Zero ? DecisionServiceConstants.PollDelay : config.PollingForModelPeriod;
                if (modelBlobPollDelay != TimeSpan.MinValue)
                    this.modelDownloader             = new AzureBlobBackgroundDownloader(metaData.ModelBlobUri, modelBlobPollDelay, downloadImmediately: true, storageConnectionString: config.AzureStorageConnectionString);
                    this.modelDownloader.Downloaded += this.UpdateContextMapper;
                    this.modelDownloader.Failed     += modelDownloader_Failed;

                appId = metaData.ApplicationID;

            this.logger         = this.recorder as ILogger;
            this.internalPolicy = internalPolicy;
            this.initialPolicy  = initialPolicy;

            if (initialExplorer != null && initialPolicy != null)
                throw new Exception("Initial Explorer and Default Policy are both specified but only one can be used.");

            var explorer = new GenericTopSlotExplorer();

            // explorer used if model not ready and defaultAction provided
            if (initialExplorer == null)
                initialExplorer = new EpsilonGreedyInitialExplorer(this.metaData.InitialExplorationEpsilon);

            // explorer used if model not ready and no default action provided
            if (initialFullExplorer == null)
                initialFullExplorer = new PermutationExplorer(1);

            var match = Regex.Match(metaData.TrainArguments ?? string.Empty, @"--cb_explore\s+(?<numActions>\d+)");

            if (match.Success)
                var numActions = int.Parse(match.Groups["numActions"].Value);
                this.numActionsProvider = new ConstantNumActionsProvider(numActions);

                this.mwtExplorer = MwtExplorer.Create(appId,
                                                      numActions, this.recorder, explorer, initialPolicy, initialFullExplorer, initialExplorer);
                if (initialExplorer != null || metaData.InitialExplorationEpsilon == 1f) // only needed when full exploration
                    numActionsProvider = internalPolicy as INumberOfActionsProvider <TContext>;
                    if (numActionsProvider == null)
                        numActionsProvider = explorer as INumberOfActionsProvider <TContext>;

                    if (numActionsProvider == null)
                        throw new ArgumentException("Explorer must implement INumberOfActionsProvider interface");

                this.mwtExplorer = MwtExplorer.Create(appId,
                                                      numActionsProvider, this.recorder, explorer, initialPolicy, initialFullExplorer, initialExplorer);
Ejemplo n.º 29
        static void ExploreGeneric <TContext>
            string appId,
            int policyType,
            JToken configPolicy,
            int numActions,
            string[] experimentalUnitIdList,
            TContext[] contextList,
            string outputFile
            var recorder = new StringRecorder <TContext>();

            bool isVariableActionContext = typeof(IVariableActionContext).IsAssignableFrom(typeof(TContext));

            switch (policyType)
            case 0:     // fixed all-equal scorer
                var scorerScore = configPolicy["Score"].Value <int>();

                var scorer = new TestScorer <TContext>(scorerScore, numActions);

                var explorer = new GenericExplorer();

                var mwt = isVariableActionContext ?
                          MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) :
                          MwtExplorer.Create(appId, numActions, recorder, explorer, scorer);

                for (int i = 0; i < experimentalUnitIdList.Length; i++)
                    int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue;
                    mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]);

                File.AppendAllText(outputFile, recorder.GetRecording());


            case 1:     // integer-progression scorer
                var scorerStartScore = configPolicy["Start"].Value <int>();

                var scorer = new TestScorer <TContext>(scorerStartScore, numActions, uniform: false);

                var explorer = new GenericExplorer();

                var mwt = isVariableActionContext ?
                          MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) :
                          MwtExplorer.Create(appId, numActions, recorder, explorer, scorer);

                for (int i = 0; i < experimentalUnitIdList.Length; i++)
                    int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue;
                    mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]);

                File.AppendAllText(outputFile, recorder.GetRecording());

Ejemplo n.º 30
        public static void Run()
            string exploration_type = "greedy";

            if (exploration_type == "greedy")
                // Initialize Epsilon-Greedy explore algorithm using built-in StringRecorder and SimpleContext types

                // Creates a recorder of built-in StringRecorder type for string serialization
                StringRecorder <SimpleContext> recorder = new StringRecorder <SimpleContext>();

                // Creates an MwtExplorer instance using the recorder above
                MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder);

                // Creates a policy that interacts with SimpleContext type
                StringPolicy policy = new StringPolicy();

                uint  numActions = 10;
                float epsilon    = 0.2f;
                // Creates an Epsilon-Greedy explorer using the specified settings
                EpsilonGreedyExplorer <SimpleContext> explorer = new EpsilonGreedyExplorer <SimpleContext>(policy, epsilon, numActions);

                // Creates a context of built-in SimpleContext type
                SimpleContext context = new SimpleContext(new Feature[] {
                    new Feature()
                        Id = 1, Value = 0.5f
                    new Feature()
                        Id = 4, Value = 1.3f
                    new Feature()
                        Id = 9, Value = -0.5f

                // Performs exploration by passing an instance of the Epsilon-Greedy exploration algorithm into MwtExplorer
                // using a sample string to uniquely identify this event
                string uniqueKey = "eventid";
                uint   action    = mwtt.ChooseAction(explorer, uniqueKey, context);


            else if (exploration_type == "tau-first")
                // Initialize Tau-First explore algorithm using custom Recorder, Policy & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint     numActions = 10;
                uint     tau        = 0;
                MyPolicy policy     = new MyPolicy();
                uint     action     = mwtt.ChooseAction(new TauFirstExplorer <MyContext>(policy, tau, numActions), "key", new MyContext());
                Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            else if (exploration_type == "bootstrap")
                // Initialize Bootstrap explore algorithm using custom Recorder, Policy & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint       numActions = 10;
                uint       numbags    = 2;
                MyPolicy[] policies   = new MyPolicy[numbags];
                for (int i = 0; i < numbags; i++)
                    policies[i] = new MyPolicy(i * 2);
                uint action = mwtt.ChooseAction(new BootstrapExplorer <MyContext>(policies, numActions), "key", new MyContext());
                Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            else if (exploration_type == "softmax")
                // Initialize Softmax explore algorithm using custom Recorder, Scorer & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint     numActions = 10;
                float    lambda     = 0.5f;
                MyScorer scorer     = new MyScorer(numActions);
                uint     action     = mwtt.ChooseAction(new SoftmaxExplorer <MyContext>(scorer, lambda, numActions), "key", new MyContext());

                Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            else if (exploration_type == "generic")
                // Initialize Generic explore algorithm using custom Recorder, Scorer & Context types
                MyRecorder recorder          = new MyRecorder();
                MwtExplorer <MyContext> mwtt = new MwtExplorer <MyContext>("mwt", recorder);

                uint     numActions = 10;
                MyScorer scorer     = new MyScorer(numActions);
                uint     action     = mwtt.ChooseAction(new GenericExplorer <MyContext>(scorer, numActions), "key", new MyContext());

                Console.WriteLine(String.Join(",", recorder.GetAllInteractions().Select(it => it.Action)));
            {  //add error here