private IEnumerable<EvalData> OfflineEvaluateInternal(TrainerResult trainerResult)
        {
            this.performanceCounters.Stage4_Evaluation_PerSec.Increment();
            this.performanceCounters.Stage4_Evaluation_Total.Increment();

            if (trainerResult == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult is null");
                yield break;
            }

            if (trainerResult.Label == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null");
                yield break;
            }

            if (trainerResult.ProgressivePrediction == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null");
                yield break;
            }

            yield return new EvalData
            {
                PolicyName = "Latest Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                    {
                        name = "Latest Policy",
                        // calcuate expectation under current randomized policy (using current exploration strategy)
                        // VW action is 0-based, label Action is 1 based
                        cost = trainerResult.ProgressivePrediction
                            .Sum(ap => ap.Score * VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, ap.Action + 1, trainerResult.Label.Cost, trainerResult.Label.Probability)),
                        prob = trainerResult.ProgressivePrediction
                            .Sum(ap => ap.Score / (trainerResult.Probabilities[ap.Action] * (1 - trainerResult.ProbabilityOfDrop)))
                    })
            };

            // the one currently running
            yield return new EvalData
            {
                PolicyName = "Deployed Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                    {
                        name = "Deployed Policy",
                        cost = trainerResult.Label.Cost,
                        prob = trainerResult.Label.Probability
                    })
            };

            for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++)
                yield return new EvalData
                {
                    PolicyName = $"Constant Policy {action}",
                    JSON = JsonConvert.SerializeObject(
                    new
                    {
                        name = $"Constant Policy {action}",
                        cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability),
                        prob = trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)
                    })
                };
        }
Example #2
0
        private IEnumerable<EvalData> OfflineEvaluateInternal(TrainerResult trainerResult)
        {
            this.performanceCounters.Stage4_Evaluation_PerSec.Increment();
            this.performanceCounters.Stage4_Evaluation_Total.Increment();

            if (trainerResult == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult is null");
                yield break;
            }

            if (trainerResult.Label == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null");
                yield break;
            }

            if (trainerResult.ProgressivePrediction == null)
            {
                this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null");
                yield break;
            }

            var pi_a_x = trainerResult.Probabilities[trainerResult.Label.Action - 1];
            var p_a_x = trainerResult.Label.Probability * (1 - trainerResult.ProbabilityOfDrop);

            yield return new EvalData
            {
                PolicyName = "Latest Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                    {
                        name = "Latest Policy",
                        // calcuate expectation under current randomized policy (using current exploration strategy)
                        // VW action is 0-based, label Action is 1 based
                        cost = (trainerResult.Label.Cost * pi_a_x) / p_a_x,
                        prob = pi_a_x / p_a_x
                    })
            };

            // the one currently running
            yield return new EvalData
            {
                PolicyName = "Deployed Policy",
                JSON = JsonConvert.SerializeObject(
                    new
                    {
                        name = "Deployed Policy",
                        cost = trainerResult.Label.Cost,
                        prob = trainerResult.Label.Probability
                    })
            };

            for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++)
            {
                string tag;
                if (!trainerResult.ActionsTags.TryGetValue(action, out tag))
                    tag = action.ToString(CultureInfo.InvariantCulture);

                var name = $"Constant Policy {tag}";
                yield return new EvalData
                {
                    PolicyName = name,
                    JSON = JsonConvert.SerializeObject(
                    new
                    {
                        name = name,
                        cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability),
                        prob = trainerResult.Label.Action == action ? 1 / (trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)) : 0
                    })
                };
            }
        }