private IEnumerable <EvalData> OfflineEvaluateInternal(TrainerResult trainerResult) { this.performanceCounters.Stage4_Evaluation_PerSec.Increment(); this.performanceCounters.Stage4_Evaluation_Total.Increment(); if (trainerResult == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult is null"); yield break; } if (trainerResult.Label == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null"); yield break; } if (trainerResult.ProgressivePrediction == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null"); yield break; } var pi_a_x = trainerResult.Probabilities[trainerResult.Label.Action - 1]; var p_a_x = trainerResult.Label.Probability * (1 - trainerResult.ProbabilityOfDrop); yield return(new EvalData { PolicyName = "Latest Policy", JSON = JsonConvert.SerializeObject( new { name = "Latest Policy", // calcuate expectation under current randomized policy (using current exploration strategy) // VW action is 0-based, label Action is 1 based cost = (trainerResult.Label.Cost * pi_a_x) / p_a_x, prob = pi_a_x / p_a_x }) }); // the one currently running yield return(new EvalData { PolicyName = "Deployed Policy", JSON = JsonConvert.SerializeObject( new { name = "Deployed Policy", cost = trainerResult.Label.Cost, prob = trainerResult.Label.Probability }) }); for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++) { string tag; if (!trainerResult.ActionsTags.TryGetValue(action, out tag)) { tag = action.ToString(CultureInfo.InvariantCulture); } var name = $"Constant Policy {tag}"; yield return(new EvalData { PolicyName = name, JSON = JsonConvert.SerializeObject( new { name = name, cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability), prob = trainerResult.Label.Action == action ? 1 / (trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)) : 0 }) }); } }
private IEnumerable <EvalEventData> OfflineEvaluateInternal(TrainerResult trainerResult) { this.performanceCounters.Stage4_Evaluation_PerSec.Increment(); this.performanceCounters.Stage4_Evaluation_Total.Increment(); if (trainerResult == null) { yield break; } if (trainerResult.Label == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null"); yield break; } if (trainerResult.ProgressiveProbabilities == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Probabilities is null"); yield break; } var pi_a_x = trainerResult.ProgressiveProbabilities[trainerResult.Label.Action - 1]; var p_a_x = trainerResult.Label.Probability * (1 - trainerResult.ProbabilityOfDrop); // the latest one we're currently training yield return(new EvalEventData { Name = "Latest Policy", // calcuate expectation under current randomized policy (using current exploration strategy) // VW action is 0-based, label Action is 1 based WeightedCost = (trainerResult.Label.Cost * pi_a_x) / p_a_x, ImportanceWeight = pi_a_x / p_a_x }); // the one currently running yield return(new EvalEventData { Name = "Deployed Policy", WeightedCost = trainerResult.Label.Cost, ImportanceWeight = 1 // for deployed policy just use the observed cost }); // Default = choosing the action that's supplied by caller yield return(new EvalEventData { Name = "Default Policy", WeightedCost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)1, trainerResult.Label.Cost, trainerResult.Label.Probability), ImportanceWeight = trainerResult.Label.Action == 1 ? 1 / (trainerResult.ObservedProbabilities[0] * (1 - trainerResult.ProbabilityOfDrop)) : 0 }); // per action tag policies for (int action = 1; action <= trainerResult.ProgressiveRanking.Length; action++) { string tag; if (!trainerResult.ActionsTags.TryGetValue(action, out tag)) { tag = action.ToString(CultureInfo.InvariantCulture); } var name = $"Constant Policy {tag}"; yield return(new EvalEventData { Name = name, WeightedCost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability), ImportanceWeight = trainerResult.Label.Action == action ? 1 / (trainerResult.ObservedProbabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop)) : 0 }); } }
private IEnumerable <EvalData> OfflineEvaluateInternal(TrainerResult trainerResult) { this.performanceCounters.Stage4_Evaluation_PerSec.Increment(); this.performanceCounters.Stage4_Evaluation_Total.Increment(); if (trainerResult == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult is null"); yield break; } if (trainerResult.Label == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.Label is null"); yield break; } if (trainerResult.ProgressivePrediction == null) { this.telemetry.TrackTrace($"Received invalid data: trainerResult.ProgressivePrediction is null"); yield break; } yield return(new EvalData { PolicyName = "Latest Policy", JSON = JsonConvert.SerializeObject( new { name = "Latest Policy", // calcuate expectation under current randomized policy (using current exploration strategy) // VW action is 0-based, label Action is 1 based cost = trainerResult.ProgressivePrediction .Sum(ap => ap.Score * VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, ap.Action + 1, trainerResult.Label.Cost, trainerResult.Label.Probability)), prob = trainerResult.ProgressivePrediction .Sum(ap => ap.Score / (trainerResult.Probabilities[ap.Action] * (1 - trainerResult.ProbabilityOfDrop))) }) }); // the one currently running yield return(new EvalData { PolicyName = "Deployed Policy", JSON = JsonConvert.SerializeObject( new { name = "Deployed Policy", cost = trainerResult.Label.Cost, prob = trainerResult.Label.Probability }) }); for (int action = 1; action <= trainerResult.ProgressivePrediction.Length; action++) { yield return new EvalData { PolicyName = $"Constant Policy {action}", JSON = JsonConvert.SerializeObject( new { name = $"Constant Policy {action}", cost = VowpalWabbitContextualBanditUtil.GetUnbiasedCost(trainerResult.Label.Action, (uint)action, trainerResult.Label.Cost, trainerResult.Label.Probability), prob = trainerResult.Probabilities[action - 1] * (1 - trainerResult.ProbabilityOfDrop) }) } } ; }