public override void Evaluate(EvaluationContext context, Model model, Split split) { split.UpdateFeedbackSlices(); Initialize(split); // if mode is explicit, make sure all item Ids are added to the ItemsMap dic if (CandidateItemsMode == CandidateItems.EXPLICIT && model is MmlRecommender) { foreach (string itemId in _allCandidateItems) { ((MmlRecommender)model).ItemsMap.ToInternalID(itemId); } } var candidateUsers = GetCandidateUsers(split); var output = new List <string>(); int maxCutOff = CutOffs.Max(); Parallel.ForEach(candidateUsers, u => { var scoredCandidateItems = GetScoredCandidateItems(model, split, u); // for this evaluator only max of NumCandidate and CutOff is considered var rankedList = scoredCandidateItems.OrderByDescending(i => i.Item2).Take(maxCutOff); string line = u.Id + " " + rankedList.Select(r => string.Format("{0}:{1:0.0000}", r.Item1, r.Item2)) .Aggregate((a, b) => a + " " + b); output.Add(line); }); OutputFile = string.Format("{0}_{1}_{2}.{3}", OutputFile.GetPathWithoutExtension(), split.Id, model.Id, OutputFile.GetFileExtension()); File.WriteAllLines(OutputFile, output); var results = new Dictionary <string, string>(); results.Add("CandidatesMode", CandidateItemsMode.ToString()); if (CandidateItemsMode == CandidateItems.EXPLICIT) { results.Add("CandidatesFile", CandidateItemsFile.Substring(CandidateItemsFile.LastIndexOf('\\') + 1)); } results.Add("NumCandidates", NumCandidates.Max().ToString()); results.Add("CutOff", maxCutOff.ToString()); results.Add("OutputFile", OutputFile); results.Add("EvalMethod", GetEvaluatorName()); context.AddResultsSet("rankingMeasures", results); }
public override void Evaluate(EvaluationContext context, Model model, Split split) { split.UpdateFeedbackSlices(); Initialize(split); // if mode is explicit, make sure all item Ids are added to the ItemsMap dic if (CandidateItemsMode == CandidateItems.EXPLICIT && model is MmlRecommender) { foreach (string itemId in _allCandidateItems) { ((MmlRecommender)model).ItemsMap.ToInternalID(itemId); } } var testUsers = GetCandidateUsers(split); int testedUsersCount = 0, testedCases = 0; var precision = new MultiKeyDictionary <int, int, double>(); var recall = new MultiKeyDictionary <int, int, double>(); var ndcg = new MultiKeyDictionary <int, int, double>(); var mrrs = new MultiKeyDictionary <int, int, double>(); var maps = new MultiKeyDictionary <int, int, double>(); var errs = new MultiKeyDictionary <int, int, double>(); var distinctItems = new MultiKeyDictionary <int, int, List <string> >(); // pre-compute IDCGs for speed up var idcgs = new Dictionary <int, double>(); for (int k = 1; k <= CutOffs.Max(); k++) { idcgs[k] = Enumerable.Range(1, k).Sum(i => 1.0 / Math.Log(i + 1, 2)); } // initialize measures foreach (int maxCand in NumCandidates) { foreach (int k in CutOffs) { precision[maxCand, k] = 0; recall[maxCand, k] = 0; ndcg[maxCand, k] = 0; mrrs[maxCand, k] = 0; errs[maxCand, k] = 0; maps[maxCand, k] = 0; distinctItems[maxCand, k] = new List <string>(); if (_perUserMetrics != null) { string path = SetupParameters["userMetricsFile"]; _perUserMetrics[maxCand, k] = new StreamWriter( string.Format("{0}_{1}_{2}_{3}_{4}.{5}", path.GetPathWithoutExtension(), split.Id, model.Id, maxCand, k, path.GetFileExtension())); _perUserMetrics[maxCand, k].WriteLine("UserId\tRecall"); } } } Parallel.ForEach(testUsers, u => { // the followings are heavy processes, the results are stored in lists to prevent over computing var scoredRelevantItems = GetScoredRelevantItems(model, split, u).ToList(); var scoredCandidateItems = GetScoredCandidateItems(model, split, u).ToList(); if (scoredRelevantItems.Count == 0) { return; } testedUsersCount++; testedCases += scoredRelevantItems.Count; // calculating measures for each numCandidates and cutoffs foreach (int maxCand in NumCandidates) { //var candidatesRankedList = scoredCandidateItems.Take(maxCand).OrderByDescending(i => i.Item2).ToList(); var rankedList = scoredCandidateItems.Take(maxCand).Union(scoredRelevantItems).OrderByDescending(i => i.Item2).ToList(); foreach (int k in CutOffs) { var topkItems = rankedList.Take(k).Select(ri => ri.Item1).ToList(); // calculating diversity of recommendations in terms of number of distinct items distinctItems[maxCand, k] = distinctItems[maxCand, k].Union(topkItems).Distinct().ToList(); // Calculate precision and recall with conventional method var hitsAtK = topkItems.Intersect(scoredRelevantItems.Select(i => i.Item1)); int hitCount = 0; double dcg = 0; int lowestRank = int.MaxValue; double map = 0; double err = 0; foreach (string item in hitsAtK) { hitCount++; int rank = topkItems.IndexOf(item); dcg += 1.0 / Math.Log(rank + 2, 2); map += (double)hitCount / (rank + 1); err += 1.0f / (rank + 1); if (rank < lowestRank) { lowestRank = rank; } } int minRelevant = Math.Min(k, scoredRelevantItems.Count); double rc = (double)hitCount / scoredRelevantItems.Count; precision[maxCand, k] += (double)hitCount / k; recall[maxCand, k] += rc; ndcg[maxCand, k] += dcg / idcgs[minRelevant]; maps[maxCand, k] += map / minRelevant; // implemented based on https://en.wikipedia.org/wiki/Mean_reciprocal_rank float m = (lowestRank < int.MaxValue) ? 1.0f / (lowestRank + 1) : 0; mrrs[maxCand, k] += m; errs[maxCand, k] += err; if (_perUserMetrics != null) { lock (this) _perUserMetrics[maxCand, k].WriteLine("{0}\t{1:0.0000}", u.Id, m); } } } }); // aggregating measures and storing the results foreach (int maxCand in NumCandidates) { foreach (int k in CutOffs) { precision[maxCand, k] /= testedUsersCount; recall[maxCand, k] /= testedUsersCount; maps[maxCand, k] /= testedUsersCount; ndcg[maxCand, k] /= testedUsersCount; mrrs[maxCand, k] /= testedUsersCount; var results = new Dictionary <string, string>(); results.Add("TestedCases", testedCases.ToString()); results.Add("CandidatesMode", CandidateItemsMode.ToString()); if (CandidateItemsMode == CandidateItems.EXPLICIT) { results.Add("CandidatesFile", CandidateItemsFile.Substring(CandidateItemsFile.LastIndexOf('\\') + 1)); } results.Add("NumCandidates", maxCand == int.MaxValue ? "max" : maxCand.ToString()); results.Add("AllCandidates", _allCandidateItems.Count.ToString()); results.Add("CutOff", k.ToString()); results.Add("Precision", string.Format("{0:0.0000}", precision[maxCand, k])); results.Add("Recall", string.Format("{0:0.0000}", recall[maxCand, k])); results.Add("MAP", string.Format("{0:0.0000}", maps[maxCand, k])); results.Add("MRR", string.Format("{0:0.0000}", mrrs[maxCand, k])); results.Add("NDCG", string.Format("{0:0.0000}", ndcg[maxCand, k])); results.Add("TotalRecomItems", distinctItems[maxCand, k].Count.ToString()); results.Add("%Coverage", string.Format("{0:0.00}", (100f * distinctItems[maxCand, k].Count / _allCandidateItems.Count))); results.Add("EvalMethod", GetEvaluatorName()); context.AddResultsSet("rankingMeasures", results); if (_perUserMetrics != null) { _perUserMetrics[maxCand, k].Flush(); _perUserMetrics[maxCand, k].Close(); } } } }
public override void Evaluate(EvaluationContext context, Model model, Split split) { split.UpdateFeedbackSlices(); Initialize(split); var testUsers = GetCandidateUsers(split); int testedCases = 0; var recallsOpr = new MultiKeyDictionary <int, int, double>(); var ndcgOpr = new MultiKeyDictionary <int, int, double>(); var mrrOpr = new MultiKeyDictionary <int, int, double>(); // initialize measures foreach (int maxCand in NumCandidates) { foreach (int k in CutOffs) { recallsOpr[maxCand, k] = 0; ndcgOpr[maxCand, k] = 0; mrrOpr[maxCand, k] = 0; if (_predictionWriter != null) { string path = SetupParameters["predictionFile"]; _predictionWriter[maxCand, k] = new StreamWriter( string.Format("{0}_{1}_{2}_{3}_{4}.{5}", path.GetPathWithoutExtension(), split.Id, model.Id, maxCand, k, path.GetFileExtension())); _predictionWriter[maxCand, k].WriteLine("UserId,ItemId,Score,IsCorrect"); } } } Parallel.ForEach(testUsers, u => { // the followings are heavy processes, the results are stored in lists to prevent over computing var scoredRelevantItems = GetScoredRelevantItems(model, split, u).ToList(); if (scoredRelevantItems.Count == 0) { return; } testedCases += scoredRelevantItems.Count; // calculating measures for each numCandidates and cutoffs foreach (int maxCand in NumCandidates) { // Calculate recall with One-plus-random method foreach (Tuple <string, float> item in scoredRelevantItems) { var scoredCandidateItems = GetScoredCandidateItems(model, split, u, item.Item1).ToList(); var candidatesRankedList = scoredCandidateItems.Take(maxCand).OrderByDescending(i => i.Item2).ToList(); int rank = IndexOfNewItem(candidatesRankedList, item.Item2); foreach (int k in CutOffs) { bool correct = false; if (rank < k) { // if the relevant items falls into the top k items recall would be one (because the only relevent items is covered) // IDCG would be one as well (if relevant item appears in the first position) recallsOpr[maxCand, k] += 1; ndcgOpr[maxCand, k] += 1.0 / Math.Log(rank + 2, 2); mrrOpr[maxCand, k] += 1.0 / (rank + 1); correct = true; } if (_predictionWriter != null) { lock (this) _predictionWriter[maxCand, k].WriteLine("{0},{1},{2},{3}", u.Id, item.Item1, item.Item2, correct ? 1 : 0); } } } } }); // aggregating measures and storing the results foreach (int maxCand in NumCandidates) { foreach (int k in CutOffs) { recallsOpr[maxCand, k] /= testedCases; ndcgOpr[maxCand, k] /= testedCases; mrrOpr[maxCand, k] /= testedCases; var results = new Dictionary <string, string>(); results.Add("TestedCases", testedCases.ToString()); results.Add("CandidatesMode", CandidateItemsMode.ToString()); if (CandidateItemsMode == CandidateItems.EXPLICIT) { results.Add("CandidatesFile", CandidateItemsFile.Substring(CandidateItemsFile.LastIndexOf('\\') + 1)); } results.Add("NumCandidates", maxCand == int.MaxValue ? "max" : maxCand.ToString()); results.Add("CutOff", k.ToString()); results.Add("Recall", string.Format("{0:0.0000}", recallsOpr[maxCand, k])); results.Add("MRR", string.Format("{0:0.0000}", mrrOpr[maxCand, k])); results.Add("NDCG", string.Format("{0:0.0000}", ndcgOpr[maxCand, k])); results.Add("EvalMethod", GetEvaluatorName()); context.AddResultsSet("rankingMeasures", results); if (_predictionWriter != null) { _predictionWriter[maxCand, k].Close(); } } } }