public PoliciesPerformance Evaluate(uint learnedAction, int numActions, ContextualBanditLabel label)
 {
     return new PoliciesPerformance(
         this.vw.Learn(
             new LearnedVsConstantPolicy(learnedAction, numActions),
             label,
             VowpalWabbitPredictionType.Scalars));
 }
Пример #2
0
        public void TestNull3()
        {
            using (var vw = new VowpalWabbit<Context, ADF>("--cb_adf --rank_all --interact ac"))
            {
                var ctx = new Context()
                {
                    ID = 25,
                    Vector = new float[] { 3 },
                    VectorC = new float[] { 2, 2, 3 },
                    ActionDependentFeatures = new[] {
                        new ADF {
                            ADFID = "23",
                        }
                    }.ToList()
                };

                var label = new ContextualBanditLabel() {
                                Action = 1,
                                Cost= 1,
                                Probability = 0.2f
                            };

                vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label);

                ctx.Vector = null;
                vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label);

                ctx.Vector = new float[] { 2 };
                ctx.VectorC = null;
                vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label);

                ctx.Vector = null;
                vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label);
            }
        }
Пример #3
0
        public void TestNull5()
        {
            using (var vw = new VowpalWabbit<Context, ADF>("--cb_adf --rank_all --interact ab"))
            {
                var ctx = new Context()
                {
                    ID = 25,
                    ActionDependentFeatures = new[] {
                        new ADF {
                            ADFID = "123"
                        },
                        new ADF(),
                        new ADF(),
                        new ADF { ADFID = "4"}
                    }.ToList()
                };

                var label = new ContextualBanditLabel()
                {
                    Action = 1,
                    Cost = 1,
                    Probability = 0.2f
                };

                vw.Learn(ctx, ctx.ActionDependentFeatures, 0, label);
                var result = vw.Predict(ctx, ctx.ActionDependentFeatures);
                Assert.AreEqual(4, result.Length);

                ctx.ActionDependentFeatures[0].ADFID = null;
                ctx.ActionDependentFeatures[3].ADFID = null;

                result = vw.Predict(ctx, ctx.ActionDependentFeatures);
                Assert.AreEqual(4, result.Length);
            }
        }
Пример #4
0
 private static EvalData Create(ContextualBanditLabel label, string policyName, uint actionTaken)
 {
     return new EvalData
     {
         PolicyName = policyName,
         JSON = JsonConvert.SerializeObject(
             new
             {
                 name = policyName,
                 cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(label.Action, actionTaken, label.Cost, label.Probability)
             })
     };
 }