Esempio n. 1
0
        private IEnumerable <EvalData> OfflineEvaluateInternal(TrainerResult trainerResult)
        {
            this.performanceCounters.Stage4_Evaluation_PerSec.Increment();
            this.performanceCounters.Stage4_Evaluation_Total.Increment();

            if (trainerResult == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult is null");
                yield break;
            }

            if (trainerResult.Label == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null");
                yield break;
            }

            if (trainerResult.ProgressivePrediction == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null");
                yield break;
            }


            var pi_a_x = trainerResult.Probabilities[trainerResult.Label.Action - 1];
            var p_a_x  = trainerResult.Label.Probability * (1 - trainerResult.ProbabilityOfDrop);

            yield return(new EvalData
            {
                PolicyName = "Latest Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                {
                    name = "Latest Policy",
                    // calcuate expectation under current randomized policy (using current exploration strategy)
                    // VW action is 0-based, label Action is 1 based
                    cost = (trainerResult.Label.Cost * pi_a_x) / p_a_x,
                    prob = pi_a_x / p_a_x
                })
            });

            // the one currently running
            yield return(new EvalData
            {
                PolicyName = "Deployed Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                {
                    name = "Deployed Policy",
                    cost = trainerResult.Label.Cost,
                    prob = trainerResult.Label.Probability
                })
            });

            for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++)
            {
                string tag;
                if (!trainerResult.ActionsTags.TryGetValue(action, out tag))
                {
                    tag = action.ToString(CultureInfo.InvariantCulture);
                }

                var name = $"Constant Policy {tag}";
                yield return(new EvalData
                {
                    PolicyName = name,
                    JSON = JsonConvert.SerializeObject(
                        new
                    {
                        name = name,
                        cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability),
                        prob = trainerResult.Label.Action == action ? 1 / (trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)) : 0
                    })
                });
            }
        }
        private IEnumerable <EvalEventData> OfflineEvaluateInternal(TrainerResult trainerResult)
        {
            this.performanceCounters.Stage4_Evaluation_PerSec.Increment();
            this.performanceCounters.Stage4_Evaluation_Total.Increment();

            if (trainerResult == null)
            {
                yield break;
            }

            if (trainerResult.Label == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null");
                yield break;
            }

            if (trainerResult.ProgressiveProbabilities == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.Probabilities is null");
                yield break;
            }

            var pi_a_x = trainerResult.ProgressiveProbabilities[trainerResult.Label.Action - 1];
            var p_a_x  = trainerResult.Label.Probability * (1 - trainerResult.ProbabilityOfDrop);

            // the latest one we're currently training
            yield return(new EvalEventData
            {
                Name = "Latest Policy",
                // calcuate expectation under current randomized policy (using current exploration strategy)
                // VW action is 0-based, label Action is 1 based
                WeightedCost = (trainerResult.Label.Cost * pi_a_x) / p_a_x,
                ImportanceWeight = pi_a_x / p_a_x
            });

            // the one currently running
            yield return(new EvalEventData
            {
                Name = "Deployed Policy",
                WeightedCost = trainerResult.Label.Cost,
                ImportanceWeight = 1 // for deployed policy just use the observed cost
            });

            // Default = choosing the action that's supplied by caller
            yield return(new EvalEventData
            {
                Name = "Default Policy",
                WeightedCost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)1, trainerResult.Label.Cost, trainerResult.Label.Probability),
                ImportanceWeight = trainerResult.Label.Action == 1 ? 1 / (trainerResult.ObservedProbabilities[0] * (1 - trainerResult.ProbabilityOfDrop)) : 0
            });

            // per action tag policies
            for (int action = 1; action <= trainerResult.ProgressiveRanking.Length; action++)
            {
                string tag;
                if (!trainerResult.ActionsTags.TryGetValue(action, out tag))
                {
                    tag = action.ToString(CultureInfo.InvariantCulture);
                }

                var name = $"Constant Policy {tag}";
                yield return(new EvalEventData
                {
                    Name = name,
                    WeightedCost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability),
                    ImportanceWeight = trainerResult.Label.Action == action ? 1 / (trainerResult.ObservedProbabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)) : 0
                });
            }
        }
Esempio n. 3
0
        private IEnumerable <EvalData> OfflineEvaluateInternal(TrainerResult trainerResult)
        {
            this.performanceCounters.Stage4_Evaluation_PerSec.Increment();
            this.performanceCounters.Stage4_Evaluation_Total.Increment();

            if (trainerResult == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult is null");
                yield break;
            }

            if (trainerResult.Label == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null");
                yield break;
            }

            if (trainerResult.ProgressivePrediction == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null");
                yield break;
            }

            yield return(new EvalData
            {
                PolicyName = "Latest Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                {
                    name = "Latest Policy",
                    // calcuate expectation under current randomized policy (using current exploration strategy)
                    // VW action is 0-based, label Action is 1 based
                    cost = trainerResult.ProgressivePrediction
                           .Sum(ap => ap.Score * VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, ap.Action + 1, trainerResult.Label.Cost, trainerResult.Label.Probability)),
                    prob = trainerResult.ProgressivePrediction
                           .Sum(ap => ap.Score / (trainerResult.Probabilities[ap.Action] * (1 - trainerResult.ProbabilityOfDrop)))
                })
            });

            // the one currently running
            yield return(new EvalData
            {
                PolicyName = "Deployed Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                {
                    name = "Deployed Policy",
                    cost = trainerResult.Label.Cost,
                    prob = trainerResult.Label.Probability
                })
            });

            for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++)
            {
                yield return new EvalData
                       {
                           PolicyName = $"Constant Policy {action}",
                           JSON       = JsonConvert.SerializeObject(
                               new
                    {
                        name = $"Constant Policy {action}",
                        cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability),
                        prob = trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)
                    })
                       }
            }
            ;
        }