private void EndToEnd(MwtExplorer <SimpleContext> mwtt, IExplorer <SimpleContext> explorer, TestRecorder <SimpleContext> recorder) { uint numActions = 10; Random rand = new Random(); List <float> rewards = new List <float>(); for (int i = 0; i < 1000; i++) { Feature[] f = new Feature[rand.Next(800, 1201)]; for (int j = 0; j < f.Length; j++) { f[j].Id = (uint)(j + 1); f[j].Value = (float)rand.NextDouble(); } SimpleContext c = new SimpleContext(f); mwtt.ChooseAction(explorer, i.ToString(), c); rewards.Add((float)rand.NextDouble()); } var testInteractions = recorder.GetAllInteractions(); Interaction[] partialInteractions = new Interaction[testInteractions.Count]; for (int i = 0; i < testInteractions.Count; i++) { partialInteractions[i] = new Interaction() { ApplicationContext = new OldSimpleContext(testInteractions[i].Context.GetFeatures(), null), ChosenAction = testInteractions[i].Action, Probability = testInteractions[i].Probability, Id = testInteractions[i].UniqueKey }; } MwtRewardReporter mrr = new MwtRewardReporter(partialInteractions); for (int i = 0; i < partialInteractions.Length; i++) { Assert.AreEqual(true, mrr.ReportReward(partialInteractions[i].GetId(), rewards[i])); } Interaction[] completeInteractions = mrr.GetAllInteractions(); MwtOptimizer mop = new MwtOptimizer(completeInteractions, numActions); string modelFile = "model"; mop.OptimizePolicyVWCSOAA(modelFile); Assert.IsTrue(System.IO.File.Exists(modelFile)); float evaluatedValue = mop.EvaluatePolicyVWCSOAA(modelFile); Assert.IsFalse(float.IsNaN(evaluatedValue)); System.IO.File.Delete(modelFile); }