示例#1
0
        private void EndToEnd(MwtExplorer <SimpleContext> mwtt, IExplorer <SimpleContext> explorer, TestRecorder <SimpleContext> recorder)
        {
            uint numActions = 10;

            Random rand = new Random();

            List <float> rewards = new List <float>();

            for (int i = 0; i < 1000; i++)
            {
                Feature[] f = new Feature[rand.Next(800, 1201)];
                for (int j = 0; j < f.Length; j++)
                {
                    f[j].Id    = (uint)(j + 1);
                    f[j].Value = (float)rand.NextDouble();
                }
                SimpleContext c = new SimpleContext(f);

                mwtt.ChooseAction(explorer, i.ToString(), c);

                rewards.Add((float)rand.NextDouble());
            }

            var testInteractions = recorder.GetAllInteractions();

            Interaction[] partialInteractions = new Interaction[testInteractions.Count];
            for (int i = 0; i < testInteractions.Count; i++)
            {
                partialInteractions[i] = new Interaction()
                {
                    ApplicationContext = new OldSimpleContext(testInteractions[i].Context.GetFeatures(), null),
                    ChosenAction       = testInteractions[i].Action,
                    Probability        = testInteractions[i].Probability,
                    Id = testInteractions[i].UniqueKey
                };
            }

            MwtRewardReporter mrr = new MwtRewardReporter(partialInteractions);

            for (int i = 0; i < partialInteractions.Length; i++)
            {
                Assert.AreEqual(true, mrr.ReportReward(partialInteractions[i].GetId(), rewards[i]));
            }

            Interaction[] completeInteractions = mrr.GetAllInteractions();
            MwtOptimizer  mop = new MwtOptimizer(completeInteractions, numActions);

            string modelFile = "model";

            mop.OptimizePolicyVWCSOAA(modelFile);

            Assert.IsTrue(System.IO.File.Exists(modelFile));

            float evaluatedValue = mop.EvaluatePolicyVWCSOAA(modelFile);

            Assert.IsFalse(float.IsNaN(evaluatedValue));

            System.IO.File.Delete(modelFile);
        }