private IEnumerable<EvalData> OfflineEvaluateInternal(TrainerResult trainerResult) { this.performanceCounters.Stage4_Evaluation_PerSec.Increment(); this.performanceCounters.Stage4_Evaluation_Total.Increment(); if (trainerResult == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult is null"); yield break; } if (trainerResult.Label == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null"); yield break; } if (trainerResult.ProgressivePrediction == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null"); yield break; } yield return new EvalData { PolicyName = "Latest Policy", JSON = JsonConvert.SerializeObject( new { name = "Latest Policy", // calcuate expectation under current randomized policy (using current exploration strategy) // VW action is 0-based, label Action is 1 based cost = trainerResult.ProgressivePrediction .Sum(ap => ap.Score * VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, ap.Action + 1, trainerResult.Label.Cost, trainerResult.Label.Probability)), prob = trainerResult.ProgressivePrediction .Sum(ap => ap.Score / (trainerResult.Probabilities[ap.Action] * (1 - trainerResult.ProbabilityOfDrop))) }) }; // the one currently running yield return new EvalData { PolicyName = "Deployed Policy", JSON = JsonConvert.SerializeObject( new { name = "Deployed Policy", cost = trainerResult.Label.Cost, prob = trainerResult.Label.Probability }) }; for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++) yield return new EvalData { PolicyName = $"Constant Policy {action}", JSON = JsonConvert.SerializeObject( new { name = $"Constant Policy {action}", cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability), prob = trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop) }) }; }
private IEnumerable<EvalData> OfflineEvaluateInternal(TrainerResult trainerResult) { this.performanceCounters.Stage4_Evaluation_PerSec.Increment(); this.performanceCounters.Stage4_Evaluation_Total.Increment(); if (trainerResult == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult is null"); yield break; } if (trainerResult.Label == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null"); yield break; } if (trainerResult.ProgressivePrediction == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null"); yield break; } var pi_a_x = trainerResult.Probabilities[trainerResult.Label.Action - 1]; var p_a_x = trainerResult.Label.Probability * (1 - trainerResult.ProbabilityOfDrop); yield return new EvalData { PolicyName = "Latest Policy", JSON = JsonConvert.SerializeObject( new { name = "Latest Policy", // calcuate expectation under current randomized policy (using current exploration strategy) // VW action is 0-based, label Action is 1 based cost = (trainerResult.Label.Cost * pi_a_x) / p_a_x, prob = pi_a_x / p_a_x }) }; // the one currently running yield return new EvalData { PolicyName = "Deployed Policy", JSON = JsonConvert.SerializeObject( new { name = "Deployed Policy", cost = trainerResult.Label.Cost, prob = trainerResult.Label.Probability }) }; for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++) { string tag; if (!trainerResult.ActionsTags.TryGetValue(action, out tag)) tag = action.ToString(CultureInfo.InvariantCulture); var name = $"Constant Policy {tag}"; yield return new EvalData { PolicyName = name, JSON = JsonConvert.SerializeObject( new { name = name, cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability), prob = trainerResult.Label.Action == action ? 1 / (trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)) : 0 }) }; } }