/// public float ComputeCorrelation(IRatings ratings, EntityType entity_type, IList <Tuple <int, float> > entity_ratings, int j) { IList <int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities var e1 = new HashSet <int>(from pair in entity_ratings select pair.Item1); var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2); e1.IntersectWith(e2); var ratings1 = new Dictionary <int, float>(); for (int index = 0; index < entity_ratings.Count; index++) { if (e1.Contains(entity_ratings[index].Item1)) { ratings1.Add(entity_ratings[index].Item1, entity_ratings[index].Item2); } } int n = e1.Count; if (n < 2) { return(0); } // single-pass variant double i_sum = 0; double j_sum = 0; double ij_sum = 0; double ii_sum = 0; double jj_sum = 0; foreach (int other_entity_id in e1) { // get ratings float r1 = ratings1[other_entity_id]; float r2 = 0; if (entity_type == EntityType.USER) { r2 = ratings.Get(j, other_entity_id, indexes2); } else { r2 = ratings.Get(other_entity_id, j, indexes2); } // update sums i_sum += r1; j_sum += r2; ij_sum += r1 * r2; ii_sum += r1 * r1; jj_sum += r2 * r2; } return(ComputeCorrelation(i_sum, j_sum, ii_sum, jj_sum, ij_sum, n)); }
/// public float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j) { if (i == j) { return(1); } IList <int> indexes1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i]; IList <int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities var e1 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes1) : ratings.GetUsers(indexes1); var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2); e1.IntersectWith(e2); int n = e1.Count; if (n < 2) { return(0); } // single-pass variant double i_sum = 0; double j_sum = 0; double ij_sum = 0; double ii_sum = 0; double jj_sum = 0; foreach (int other_entity_id in e1) { // get ratings float r1 = 0; float r2 = 0; if (entity_type == EntityType.USER) { r1 = ratings.Get(i, other_entity_id, indexes1); r2 = ratings.Get(j, other_entity_id, indexes2); } else { r1 = ratings.Get(other_entity_id, i, indexes1); r2 = ratings.Get(other_entity_id, j, indexes2); } // update sums i_sum += r1; j_sum += r2; ij_sum += r1 * r2; ii_sum += r1 * r1; jj_sum += r2 * r2; } return(ComputeCorrelation(i_sum, j_sum, ii_sum, jj_sum, ij_sum, n)); }
/// <summary>Performs user-wise fold-in evaluation, but instead of folding in perform incremental training with the new data</summary> /// <remarks> /// </remarks> /// <returns>the evaluation results</returns> /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param> /// <param name='update_data'>the rating data used to represent the users</param> /// <param name='eval_data'>the evaluation data</param> static public RatingPredictionEvaluationResults EvaluateFoldInIncrementalTraining(this IncrementalRatingPredictor recommender, IRatings update_data, IRatings eval_data) { double rmse = 0; double mae = 0; double cbd = 0; int rating_count = 0; foreach (int user_id in update_data.AllUsers) { if (eval_data.AllUsers.Contains(user_id)) { var local_recommender = (IncrementalRatingPredictor)recommender.Clone(); // add ratings and perform incremental training var user_ratings = new RatingsProxy(update_data, update_data.ByUser[user_id]); local_recommender.AddRatings(user_ratings); var items_to_rate = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray(); var predicted_ratings = recommender.Recommend(user_id, candidate_items: items_to_rate); foreach (var pred in predicted_ratings) { float prediction = pred.Item2; float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]); float error = prediction - actual_rating; rmse += error * error; mae += Math.Abs(error); cbd += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating); rating_count++; } // remove ratings again local_recommender.RemoveRatings(user_ratings); Console.Error.Write("."); } } mae = mae / rating_count; rmse = Math.Sqrt(rmse / rating_count); cbd = cbd / rating_count; var result = new RatingPredictionEvaluationResults(); result["RMSE"] = (float)rmse; result["MAE"] = (float)mae; result["NMAE"] = (float)mae / (recommender.MaxRating - recommender.MinRating); result["CBD"] = (float)cbd; return(result); }
/// <summary>Performs user-wise fold-in evaluation</summary> /// <returns>the evaluation results</returns> /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param> /// <param name='update_data'>the rating data used to represent the users</param> /// <param name='eval_data'>the evaluation data</param> static public RatingPredictionEvaluationResults EvaluateFoldIn(this IFoldInRatingPredictor recommender, IRatings update_data, IRatings eval_data) { double rmse = 0; double mae = 0; double cbd = 0; int rating_count = 0; foreach (int user_id in update_data.AllUsers) { if (eval_data.AllUsers.Contains(user_id)) { var known_ratings = ( from index in update_data.ByUser[user_id] select Tuple.Create(update_data.Items[index], update_data[index]) ).ToArray(); var items_to_rate = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray(); var predicted_ratings = recommender.ScoreItems(known_ratings, items_to_rate); foreach (var pred in predicted_ratings) { float prediction = pred.Item2; float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]); float error = prediction - actual_rating; rmse += error * error; mae += Math.Abs(error); cbd += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating); rating_count++; } Console.Error.Write("."); } } mae = mae / rating_count; rmse = Math.Sqrt(rmse / rating_count); cbd = cbd / rating_count; var result = new RatingPredictionEvaluationResults(); result["RMSE"] = (float)rmse; result["MAE"] = (float)mae; result["NMAE"] = (float)mae / (recommender.MaxRating - recommender.MinRating); result["CBD"] = (float)cbd; return(result); }
/// <summary>Performs user-wise fold-in evaluation</summary> /// <returns>the evaluation results</returns> /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param> /// <param name='update_data'>the rating data used to represent the users</param> /// <param name='eval_data'>the evaluation data</param> static public RatingPredictionEvaluationResults EvaluateFoldIn(this IFoldInRatingPredictor recommender, IRatings update_data, IRatings eval_data) { double rmse = 0; double mae = 0; double cbd = 0; int rating_count = 0; foreach (int user_id in update_data.AllUsers) if (eval_data.AllUsers.Contains(user_id)) { var known_ratings = ( from index in update_data.ByUser[user_id] select Tuple.Create(update_data.Items[index], update_data[index]) ).ToArray(); var items_to_rate = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray(); var predicted_ratings = recommender.ScoreItems(known_ratings, items_to_rate); foreach (var pred in predicted_ratings) { float prediction = pred.Item2; float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]); float error = prediction - actual_rating; rmse += error * error; mae += Math.Abs(error); cbd += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating); rating_count++; } Console.Error.Write("."); } mae = mae / rating_count; rmse = Math.Sqrt(rmse / rating_count); cbd = cbd / rating_count; var result = new RatingPredictionEvaluationResults(); result["RMSE"] = (float) rmse; result["MAE"] = (float) mae; result["NMAE"] = (float) mae / (recommender.MaxRating - recommender.MinRating); result["CBD"] = (float) cbd; return result; }
/// <summary>Compute correlations between two entities for given ratings</summary> /// <param name="ratings">the rating data</param> /// <param name="entity_type">the entity type, either USER or ITEM</param> /// <param name="i">the ID of first entity</param> /// <param name="j">the ID of second entity</param> /// <param name="shrinkage">the shrinkage parameter</param> public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage) { if (i == j) { return(1); } IList <int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i]; IList <int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities HashSet <int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1); HashSet <int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2); e1.IntersectWith(e2); int n = e1.Count; if (n < 2) { return(0); } List <Ratings> ratings_by_other_entity = (entity_type == EntityType.USER) ? ratings.ByItem : ratings.ByUser; double sum_ij = 0; double sum_ii = 0; double sum_jj = 0; foreach (int other_entity_id in e1) { double average_rating = ratings_by_other_entity[other_entity_id].Average; // get ratings double r1 = 0; double r2 = 0; if (entity_type == EntityType.USER) { r1 = ratings.Get(i, other_entity_id, ratings1); r2 = ratings.Get(j, other_entity_id, ratings2); } else { r1 = ratings.Get(other_entity_id, i, ratings1); r2 = ratings.Get(other_entity_id, j, ratings2); } double dev_i = r1 - average_rating; double dev_j = r2 - average_rating; // update sums sum_ij += dev_i * dev_j; sum_ii += dev_i * dev_i; sum_jj += dev_j * dev_j; } double denominator = Math.Sqrt(sum_ii * sum_jj); if (denominator == 0) { return(0); } double adjusted_cosine = sum_ij / denominator; return((float)adjusted_cosine * (n / (n + shrinkage))); }
/// public float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j) { if (i == j) return 1; IList<int> indexes1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i]; IList<int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities var e1 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes1) : ratings.GetUsers(indexes1); var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2); e1.IntersectWith(e2); int n = e1.Count; if (n < 2) return 0; // single-pass variant double i_sum = 0; double j_sum = 0; double ij_sum = 0; double ii_sum = 0; double jj_sum = 0; foreach (int other_entity_id in e1) { // get ratings float r1 = 0; float r2 = 0; if (entity_type == EntityType.USER) { r1 = ratings.Get(i, other_entity_id, indexes1); r2 = ratings.Get(j, other_entity_id, indexes2); } else { r1 = ratings.Get(other_entity_id, i, indexes1); r2 = ratings.Get(other_entity_id, j, indexes2); } // update sums i_sum += r1; j_sum += r2; ij_sum += r1 * r2; ii_sum += r1 * r1; jj_sum += r2 * r2; } return ComputeCorrelation (i_sum, j_sum, ii_sum, jj_sum, ij_sum, n); }
/// public float ComputeCorrelation(IRatings ratings, EntityType entity_type, IList<Tuple<int, float>> entity_ratings, int j) { IList<int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities var e1 = new HashSet<int>(from pair in entity_ratings select pair.Item1); var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2); e1.IntersectWith(e2); var ratings1 = new Dictionary<int, float>(); for (int index = 0; index < entity_ratings.Count; index++) if (e1.Contains(entity_ratings[index].Item1)) ratings1.Add(entity_ratings[index].Item1, entity_ratings[index].Item2); int n = e1.Count; if (n < 2) return 0; // single-pass variant double i_sum = 0; double j_sum = 0; double ij_sum = 0; double ii_sum = 0; double jj_sum = 0; foreach (int other_entity_id in e1) { // get ratings float r1 = ratings1[other_entity_id]; float r2 = 0; if (entity_type == EntityType.USER) r2 = ratings.Get(j, other_entity_id, indexes2); else r2 = ratings.Get(other_entity_id, j, indexes2); // update sums i_sum += r1; j_sum += r2; ij_sum += r1 * r2; ii_sum += r1 * r1; jj_sum += r2 * r2; } return ComputeCorrelation (i_sum, j_sum, ii_sum, jj_sum, ij_sum, n); }
/// <summary>Performs user-wise fold-in evaluation, but instead of folding in perform incremental training with the new data</summary> /// <remarks> /// </remarks> /// <returns>the evaluation results</returns> /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param> /// <param name='update_data'>the rating data used to represent the users</param> /// <param name='eval_data'>the evaluation data</param> static public RatingPredictionEvaluationResults EvaluateFoldInIncrementalTraining(this IncrementalRatingPredictor recommender, IRatings update_data, IRatings eval_data) { double rmse = 0; double mae = 0; double cbd = 0; int rating_count = 0; foreach (int user_id in update_data.AllUsers) if (eval_data.AllUsers.Contains(user_id)) { var local_recommender = (IncrementalRatingPredictor) recommender.Clone(); // add ratings and perform incremental training var user_ratings = new RatingsProxy(update_data, update_data.ByUser[user_id]); local_recommender.AddRatings(user_ratings); var items_to_rate = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray(); var predicted_ratings = recommender.Recommend(user_id, candidate_items:items_to_rate); foreach (var pred in predicted_ratings) { float prediction = pred.Item2; float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]); float error = prediction - actual_rating; rmse += error * error; mae += Math.Abs(error); cbd += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating); rating_count++; } // remove ratings again local_recommender.RemoveRatings(user_ratings); Console.Error.Write("."); } mae = mae / rating_count; rmse = Math.Sqrt(rmse / rating_count); cbd = cbd / rating_count; var result = new RatingPredictionEvaluationResults(); result["RMSE"] = (float) rmse; result["MAE"] = (float) mae; result["NMAE"] = (float) mae / (recommender.MaxRating - recommender.MinRating); result["CBD"] = (float) cbd; return result; }
/// <summary>Compute correlations between two entities for given ratings</summary> /// <param name="ratings">the rating data</param> /// <param name="entity_type">the entity type, either USER or ITEM</param> /// <param name="i">the ID of first entity</param> /// <param name="j">the ID of second entity</param> /// <param name="shrinkage">the shrinkage parameter</param> public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage) { if (i == j) return 1; IList<int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i]; IList<int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities HashSet<int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1); HashSet<int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2); e1.IntersectWith(e2); int n = e1.Count; if (n < 2) return 0; List<Ratings> ratings_by_other_entity = (entity_type == EntityType.USER) ? ratings.ByItem : ratings.ByUser; double sum_ij = 0; double sum_ii = 0; double sum_jj = 0; foreach (int other_entity_id in e1) { double average_rating = ratings_by_other_entity[other_entity_id].Average; // get ratings double r1 = 0; double r2 = 0; if (entity_type == EntityType.USER) { r1 = ratings.Get(i, other_entity_id, ratings1); r2 = ratings.Get(j, other_entity_id, ratings2); } else { r1 = ratings.Get(other_entity_id, i, ratings1); r2 = ratings.Get(other_entity_id, j, ratings2); } double dev_i = r1 - average_rating; double dev_j = r2 - average_rating; // update sums sum_ij += dev_i * dev_j; sum_ii += dev_i * dev_i; sum_jj += dev_j * dev_j; } double denominator = Math.Sqrt( sum_ii * sum_jj ); if (denominator == 0) return 0; double adjusted_cosine = sum_ij / denominator; return (float) adjusted_cosine * (n / (n + shrinkage)); }
/// <summary>Compute correlations between two entities for given ratings</summary> /// <param name="ratings">the rating data</param> /// <param name="entity_type">the entity type, either USER or ITEM</param> /// <param name="i">the ID of first entity</param> /// <param name="j">the ID of second entity</param> /// <param name="shrinkage">the shrinkage parameter</param> public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage) { if (i == j) { return(1); } IList <int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i]; IList <int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities HashSet <int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1); HashSet <int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2); e1.IntersectWith(e2); int n = e1.Count; if (n < 2) { return(0); } // single-pass variant double i_sum = 0; double j_sum = 0; double ij_sum = 0; double ii_sum = 0; double jj_sum = 0; foreach (int other_entity_id in e1) { // get ratings double r1 = 0; double r2 = 0; if (entity_type == EntityType.USER) { r1 = ratings.Get(i, other_entity_id, ratings1); r2 = ratings.Get(j, other_entity_id, ratings2); } else { r1 = ratings.Get(other_entity_id, i, ratings1); r2 = ratings.Get(other_entity_id, j, ratings2); } // update sums i_sum += r1; j_sum += r2; ij_sum += r1 * r2; ii_sum += r1 * r1; jj_sum += r2 * r2; } double denominator = Math.Sqrt((n * ii_sum - i_sum * i_sum) * (n * jj_sum - j_sum * j_sum)); if (denominator == 0) { return(0); } double pmcc = (n * ij_sum - i_sum * j_sum) / denominator; return((float)pmcc * (n / (n + shrinkage))); }
/// <summary>Compute correlations between two entities for given ratings</summary> /// <param name="ratings">the rating data</param> /// <param name="entity_type">the entity type, either USER or ITEM</param> /// <param name="i">the ID of first entity</param> /// <param name="j">the ID of second entity</param> /// <param name="shrinkage">the shrinkage parameter</param> public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage) { if (i == j) return 1; IList<int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i]; IList<int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities HashSet<int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1); HashSet<int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2); e1.IntersectWith(e2); int n = e1.Count; if (n < 2) return 0; // single-pass variant double i_sum = 0; double j_sum = 0; double ij_sum = 0; double ii_sum = 0; double jj_sum = 0; foreach (int other_entity_id in e1) { // get ratings double r1 = 0; double r2 = 0; if (entity_type == EntityType.USER) { r1 = ratings.Get(i, other_entity_id, ratings1); r2 = ratings.Get(j, other_entity_id, ratings2); } else { r1 = ratings.Get(other_entity_id, i, ratings1); r2 = ratings.Get(other_entity_id, j, ratings2); } // update sums i_sum += r1; j_sum += r2; ij_sum += r1 * r2; ii_sum += r1 * r1; jj_sum += r2 * r2; } double denominator = Math.Sqrt( (n * ii_sum - i_sum * i_sum) * (n * jj_sum - j_sum * j_sum) ); if (denominator == 0) return 0; double pmcc = (n * ij_sum - i_sum * j_sum) / denominator; return (float) pmcc * (n / (n + shrinkage)); }
/// <summary>Compute correlation between two entities for given ratings</summary> /// <param name="ratings">the rating data</param> /// <param name="entity_type">the entity type, either USER or ITEM</param> /// <param name="entity_ratings">ratings identifying the first entity</param> /// <param name="j">the ID of second entity</param> /// <param name="shrinkage">the shrinkage parameter, set to 0 for the standard Pearson correlation without shrinkage</param> public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, IList<Pair<int, float>> entity_ratings, int j, float shrinkage) { IList<int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j]; // get common ratings for the two entities var e1 = new HashSet<int>(from pair in entity_ratings select pair.First); var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2); e1.IntersectWith(e2); var ratings1 = new Dictionary<int, float>(); for (int index = 0; index < entity_ratings.Count; index++) if (e1.Contains(entity_ratings[index].First)) ratings1.Add(entity_ratings[index].First, entity_ratings[index].Second); int n = e1.Count; if (n < 2) return 0; // single-pass variant double i_sum = 0; double j_sum = 0; double ij_sum = 0; double ii_sum = 0; double jj_sum = 0; foreach (int other_entity_id in e1) { // get ratings float r1 = ratings1[other_entity_id]; float r2 = 0; if (entity_type == EntityType.USER) r2 = ratings.Get(j, other_entity_id, indexes2); else r2 = ratings.Get(other_entity_id, j, indexes2); // update sums i_sum += r1; j_sum += r2; ij_sum += r1 * r2; ii_sum += r1 * r1; jj_sum += r2 * r2; } double denominator = Math.Sqrt( (n * ii_sum - i_sum * i_sum) * (n * jj_sum - j_sum * j_sum) ); if (denominator == 0) return 0; double pmcc = (n * ij_sum - i_sum * j_sum) / denominator; return (float) pmcc * ((n - 1) / (n - 1 + shrinkage)); }