/// <summary>Evaluation for rankings of items</summary> /// <remarks> /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation. /// The evaluation measures are listed in the ItemPredictionMeasures property. /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results /// and the number of items that were taken into account. /// /// Literature: /// C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008 /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results</returns> static public Dictionary <string, double> Evaluate( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, ICollection <int> relevant_users, ICollection <int> relevant_items) { if (train.Overlap(test) > 0) { Console.Error.WriteLine("WARNING: Overlapping train and test data"); } // compute evaluation measures double auc_sum = 0; double map_sum = 0; double prec_5_sum = 0; double prec_10_sum = 0; double prec_15_sum = 0; double ndcg_sum = 0; int num_users = 0; foreach (int user_id in relevant_users) { var correct_items = new HashSet <int>(test.UserMatrix[user_id]); correct_items.IntersectWith(relevant_items); // the number of items that are really relevant for this user var relevant_items_in_train = new HashSet <int>(train.UserMatrix[user_id]); relevant_items_in_train.IntersectWith(relevant_items); int num_eval_items = relevant_items.Count - relevant_items_in_train.Count(); // skip all users that have 0 or #relevant_items test items if (correct_items.Count == 0) { continue; } if (num_eval_items - correct_items.Count == 0) { continue; } num_users++; int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_items); auc_sum += AUC(prediction, correct_items, train.UserMatrix[user_id]); map_sum += MAP(prediction, correct_items, train.UserMatrix[user_id]); ndcg_sum += NDCG(prediction, correct_items, train.UserMatrix[user_id]); prec_5_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5); prec_10_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10); prec_15_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15); if (prediction.Length != relevant_items.Count) { throw new Exception("Not all items have been ranked."); } if (num_users % 1000 == 0) { Console.Error.Write("."); } if (num_users % 20000 == 0) { Console.Error.WriteLine(); } } var result = new Dictionary <string, double>(); result["AUC"] = auc_sum / num_users; result["MAP"] = map_sum / num_users; result["NDCG"] = ndcg_sum / num_users; result["prec@5"] = prec_5_sum / num_users; result["prec@10"] = prec_10_sum / num_users; result["prec@15"] = prec_15_sum / num_users; result["num_users"] = num_users; result["num_lists"] = num_users; result["num_items"] = relevant_items.Count; return(result); }
/// <summary>Evaluation for rankings of filtered items</summary> /// <remarks> /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data</param> /// <param name="item_attributes">the item attributes to be used for filtering</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results</returns> static public Dictionary<string, double> Evaluate( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, SparseBooleanMatrix item_attributes, ICollection<int> relevant_users, ICollection<int> relevant_items) { if (train.Overlap(test) > 0) Console.Error.WriteLine("WARNING: Overlapping train and test data"); SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose(); // compute evaluation measures double auc_sum = 0; double map_sum = 0; double prec_5_sum = 0; double prec_10_sum = 0; double prec_15_sum = 0; double ndcg_sum = 0; // for counting the users and the evaluation lists int num_lists = 0; int num_users = 0; int last_user_id = -1; foreach (int user_id in relevant_users) { var filtered_items = GetFilteredItems(user_id, test, item_attributes); foreach (int attribute_id in filtered_items.Keys) { // TODO optimize this a bit, currently it is quite naive var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]); relevant_filtered_items.IntersectWith(relevant_items); var correct_items = new HashSet<int>(filtered_items[attribute_id]); correct_items.IntersectWith(relevant_filtered_items); // the number of items that are really relevant for this user var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]); relevant_items_in_train.IntersectWith(relevant_filtered_items); int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count(); // skip all users that have 0 or #relevant_filtered_items test items if (correct_items.Count == 0) continue; if (num_eval_items - correct_items.Count == 0) continue; // counting stats num_lists++; if (last_user_id != user_id) { last_user_id = user_id; num_users++; } // evaluation int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items); auc_sum += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]); map_sum += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]); ndcg_sum += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]); prec_5_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5); prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10); prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15); if (prediction.Length != relevant_filtered_items.Count) throw new Exception("Not all items have been ranked."); if (num_lists % 1000 == 0) Console.Error.Write("."); if (num_lists % 20000 == 0) Console.Error.WriteLine(); } } var result = new Dictionary<string, double>(); result.Add("AUC", auc_sum / num_lists); result.Add("MAP", map_sum / num_lists); result.Add("NDCG", ndcg_sum / num_lists); result.Add("prec@5", prec_5_sum / num_lists); result.Add("prec@10", prec_10_sum / num_lists); result.Add("prec@15", prec_15_sum / num_lists); result.Add("num_users", num_users); result.Add("num_lists", num_lists); result.Add("num_items", relevant_items.Count); return result; }
/// <summary>Evaluation for rankings of items</summary> /// <remarks> /// User-item combinations that appear in both sets are ignored for the test set, and thus in the evaluation. /// The evaluation measures are listed in the ItemPredictionMeasures property. /// Additionally, 'num_users' and 'num_items' report the number of users that were used to compute the results /// and the number of items that were taken into account. /// /// Literature: /// C. Manning, P. Raghavan, H. Schütze: Introduction to Information Retrieval, Cambridge University Press, 2008 /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results</returns> public static Dictionary<string, double> Evaluate( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, ICollection<int> relevant_users, ICollection<int> relevant_items) { if (train.Overlap(test) > 0) Console.Error.WriteLine("WARNING: Overlapping train and test data"); // compute evaluation measures double auc_sum = 0; double map_sum = 0; double prec_5_sum = 0; double prec_10_sum = 0; double prec_15_sum = 0; double ndcg_sum = 0; int num_users = 0; foreach (int user_id in relevant_users) { var correct_items = new HashSet<int>(test.UserMatrix[user_id]); correct_items.IntersectWith(relevant_items); // the number of items that are really relevant for this user var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]); relevant_items_in_train.IntersectWith(relevant_items); int num_eval_items = relevant_items.Count - relevant_items_in_train.Count(); // skip all users that have 0 or #relevant_items test items if (correct_items.Count == 0) continue; if (num_eval_items - correct_items.Count == 0) continue; num_users++; int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_items); auc_sum += AUC(prediction, correct_items, train.UserMatrix[user_id]); map_sum += MAP(prediction, correct_items, train.UserMatrix[user_id]); ndcg_sum += NDCG(prediction, correct_items, train.UserMatrix[user_id]); prec_5_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5); prec_10_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10); prec_15_sum += PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15); if (prediction.Length != relevant_items.Count) throw new Exception("Not all items have been ranked."); if (num_users % 1000 == 0) Console.Error.Write("."); if (num_users % 20000 == 0) Console.Error.WriteLine(); } var result = new Dictionary<string, double>(); result["AUC"] = auc_sum / num_users; result["MAP"] = map_sum / num_users; result["NDCG"] = ndcg_sum / num_users; result["prec@5"] = prec_5_sum / num_users; result["prec@10"] = prec_10_sum / num_users; result["prec@15"] = prec_15_sum / num_users; result["num_users"] = num_users; result["num_lists"] = num_users; result["num_items"] = relevant_items.Count; return result; }
/// <summary>Evaluation for rankings of filtered items</summary> /// <remarks> /// </remarks> /// <param name="recommender">item recommender</param> /// <param name="test">test cases</param> /// <param name="train">training data</param> /// <param name="item_attributes">the item attributes to be used for filtering</param> /// <param name="relevant_users">a collection of integers with all relevant users</param> /// <param name="relevant_items">a collection of integers with all relevant items</param> /// <returns>a dictionary containing the evaluation results</returns> public static Dictionary<string, double> Evaluate( IItemRecommender recommender, IPosOnlyFeedback test, IPosOnlyFeedback train, SparseBooleanMatrix item_attributes, ICollection<int> relevant_users, ICollection<int> relevant_items) { if (train.Overlap(test) > 0) Console.Error.WriteLine("WARNING: Overlapping train and test data"); SparseBooleanMatrix items_by_attribute = (SparseBooleanMatrix) item_attributes.Transpose(); // compute evaluation measures double auc_sum = 0; double map_sum = 0; double prec_5_sum = 0; double prec_10_sum = 0; double prec_15_sum = 0; double ndcg_sum = 0; // for counting the users and the evaluation lists int num_lists = 0; int num_users = 0; int last_user_id = -1; foreach (int user_id in relevant_users) { var filtered_items = GetFilteredItems(user_id, test, item_attributes); foreach (int attribute_id in filtered_items.Keys) { // TODO optimize this a bit, currently it is quite naive var relevant_filtered_items = new HashSet<int>(items_by_attribute[attribute_id]); relevant_filtered_items.IntersectWith(relevant_items); var correct_items = new HashSet<int>(filtered_items[attribute_id]); correct_items.IntersectWith(relevant_filtered_items); // the number of items that are really relevant for this user var relevant_items_in_train = new HashSet<int>(train.UserMatrix[user_id]); relevant_items_in_train.IntersectWith(relevant_filtered_items); int num_eval_items = relevant_filtered_items.Count - relevant_items_in_train.Count(); // skip all users that have 0 or #relevant_filtered_items test items if (correct_items.Count == 0) continue; if (num_eval_items - correct_items.Count == 0) continue; // counting stats num_lists++; if (last_user_id != user_id) { last_user_id = user_id; num_users++; } // evaluation int[] prediction = Prediction.PredictItems(recommender, user_id, relevant_filtered_items); auc_sum += Items.AUC(prediction, correct_items, train.UserMatrix[user_id]); map_sum += Items.MAP(prediction, correct_items, train.UserMatrix[user_id]); ndcg_sum += Items.NDCG(prediction, correct_items, train.UserMatrix[user_id]); prec_5_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 5); prec_10_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 10); prec_15_sum += Items.PrecisionAt(prediction, correct_items, train.UserMatrix[user_id], 15); if (prediction.Length != relevant_filtered_items.Count) throw new Exception("Not all items have been ranked."); if (num_lists % 1000 == 0) Console.Error.Write("."); if (num_lists % 20000 == 0) Console.Error.WriteLine(); } } var result = new Dictionary<string, double>(); result.Add("AUC", auc_sum / num_lists); result.Add("MAP", map_sum / num_lists); result.Add("NDCG", ndcg_sum / num_lists); result.Add("prec@5", prec_5_sum / num_lists); result.Add("prec@10", prec_10_sum / num_lists); result.Add("prec@15", prec_15_sum / num_lists); result.Add("num_users", num_users); result.Add("num_lists", num_lists); result.Add("num_items", relevant_items.Count); return result; }