Exemple #1
0
        ///
        public float ComputeCorrelation(IRatings ratings, EntityType entity_type, IList <Tuple <int, float> > entity_ratings, int j)
        {
            IList <int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            var e1 = new HashSet <int>(from pair in entity_ratings select pair.Item1);
            var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2);

            e1.IntersectWith(e2);
            var ratings1 = new Dictionary <int, float>();

            for (int index = 0; index < entity_ratings.Count; index++)
            {
                if (e1.Contains(entity_ratings[index].Item1))
                {
                    ratings1.Add(entity_ratings[index].Item1, entity_ratings[index].Item2);
                }
            }

            int n = e1.Count;

            if (n < 2)
            {
                return(0);
            }

            // single-pass variant
            double i_sum  = 0;
            double j_sum  = 0;
            double ij_sum = 0;
            double ii_sum = 0;
            double jj_sum = 0;

            foreach (int other_entity_id in e1)
            {
                // get ratings
                float r1 = ratings1[other_entity_id];
                float r2 = 0;
                if (entity_type == EntityType.USER)
                {
                    r2 = ratings.Get(j, other_entity_id, indexes2);
                }
                else
                {
                    r2 = ratings.Get(other_entity_id, j, indexes2);
                }

                // update sums
                i_sum  += r1;
                j_sum  += r2;
                ij_sum += r1 * r2;
                ii_sum += r1 * r1;
                jj_sum += r2 * r2;
            }

            return(ComputeCorrelation(i_sum, j_sum, ii_sum, jj_sum, ij_sum, n));
        }
Exemple #2
0
        ///
        public float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j)
        {
            if (i == j)
            {
                return(1);
            }

            IList <int> indexes1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i];
            IList <int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            var e1 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes1) : ratings.GetUsers(indexes1);
            var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2);

            e1.IntersectWith(e2);

            int n = e1.Count;

            if (n < 2)
            {
                return(0);
            }

            // single-pass variant
            double i_sum  = 0;
            double j_sum  = 0;
            double ij_sum = 0;
            double ii_sum = 0;
            double jj_sum = 0;

            foreach (int other_entity_id in e1)
            {
                // get ratings
                float r1 = 0;
                float r2 = 0;
                if (entity_type == EntityType.USER)
                {
                    r1 = ratings.Get(i, other_entity_id, indexes1);
                    r2 = ratings.Get(j, other_entity_id, indexes2);
                }
                else
                {
                    r1 = ratings.Get(other_entity_id, i, indexes1);
                    r2 = ratings.Get(other_entity_id, j, indexes2);
                }

                // update sums
                i_sum  += r1;
                j_sum  += r2;
                ij_sum += r1 * r2;
                ii_sum += r1 * r1;
                jj_sum += r2 * r2;
            }

            return(ComputeCorrelation(i_sum, j_sum, ii_sum, jj_sum, ij_sum, n));
        }
        /// <summary>Performs user-wise fold-in evaluation, but instead of folding in perform incremental training with the new data</summary>
        /// <remarks>
        /// </remarks>
        /// <returns>the evaluation results</returns>
        /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param>
        /// <param name='update_data'>the rating data used to represent the users</param>
        /// <param name='eval_data'>the evaluation data</param>
        static public RatingPredictionEvaluationResults EvaluateFoldInIncrementalTraining(this IncrementalRatingPredictor recommender, IRatings update_data, IRatings eval_data)
        {
            double rmse = 0;
            double mae  = 0;
            double cbd  = 0;

            int rating_count = 0;

            foreach (int user_id in update_data.AllUsers)
            {
                if (eval_data.AllUsers.Contains(user_id))
                {
                    var local_recommender = (IncrementalRatingPredictor)recommender.Clone();

                    // add ratings and perform incremental training
                    var user_ratings = new RatingsProxy(update_data, update_data.ByUser[user_id]);
                    local_recommender.AddRatings(user_ratings);

                    var items_to_rate     = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray();
                    var predicted_ratings = recommender.Recommend(user_id, candidate_items: items_to_rate);

                    foreach (var pred in predicted_ratings)
                    {
                        float prediction    = pred.Item2;
                        float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]);
                        float error         = prediction - actual_rating;

                        rmse += error * error;
                        mae  += Math.Abs(error);
                        cbd  += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating);
                        rating_count++;
                    }

                    // remove ratings again
                    local_recommender.RemoveRatings(user_ratings);

                    Console.Error.Write(".");
                }
            }

            mae  = mae / rating_count;
            rmse = Math.Sqrt(rmse / rating_count);
            cbd  = cbd / rating_count;

            var result = new RatingPredictionEvaluationResults();

            result["RMSE"] = (float)rmse;
            result["MAE"]  = (float)mae;
            result["NMAE"] = (float)mae / (recommender.MaxRating - recommender.MinRating);
            result["CBD"]  = (float)cbd;
            return(result);
        }
        /// <summary>Performs user-wise fold-in evaluation</summary>
        /// <returns>the evaluation results</returns>
        /// <param name='recommender'>a rating predictor capable of performing a user fold-in</param>
        /// <param name='update_data'>the rating data used to represent the users</param>
        /// <param name='eval_data'>the evaluation data</param>
        static public RatingPredictionEvaluationResults EvaluateFoldIn(this IFoldInRatingPredictor recommender, IRatings update_data, IRatings eval_data)
        {
            double rmse = 0;
            double mae  = 0;
            double cbd  = 0;

            int rating_count = 0;

            foreach (int user_id in update_data.AllUsers)
            {
                if (eval_data.AllUsers.Contains(user_id))
                {
                    var known_ratings = (
                        from index in update_data.ByUser[user_id]
                        select Tuple.Create(update_data.Items[index], update_data[index])
                        ).ToArray();
                    var items_to_rate     = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray();
                    var predicted_ratings = recommender.ScoreItems(known_ratings, items_to_rate);

                    foreach (var pred in predicted_ratings)
                    {
                        float prediction    = pred.Item2;
                        float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]);
                        float error         = prediction - actual_rating;

                        rmse += error * error;
                        mae  += Math.Abs(error);
                        cbd  += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating);
                        rating_count++;
                    }
                    Console.Error.Write(".");
                }
            }

            mae  = mae / rating_count;
            rmse = Math.Sqrt(rmse / rating_count);
            cbd  = cbd / rating_count;

            var result = new RatingPredictionEvaluationResults();

            result["RMSE"] = (float)rmse;
            result["MAE"]  = (float)mae;
            result["NMAE"] = (float)mae / (recommender.MaxRating - recommender.MinRating);
            result["CBD"]  = (float)cbd;
            return(result);
        }
Exemple #5
0
		/// <summary>Performs user-wise fold-in evaluation</summary>
		/// <returns>the evaluation results</returns>
		/// <param name='recommender'>a rating predictor capable of performing a user fold-in</param>
		/// <param name='update_data'>the rating data used to represent the users</param>
		/// <param name='eval_data'>the evaluation data</param>
		static public RatingPredictionEvaluationResults EvaluateFoldIn(this IFoldInRatingPredictor recommender, IRatings update_data, IRatings eval_data)
		{
			double rmse = 0;
			double mae  = 0;
			double cbd  = 0;

			int rating_count = 0;
			foreach (int user_id in update_data.AllUsers)
				if (eval_data.AllUsers.Contains(user_id))
				{
					var known_ratings = (
						from index in update_data.ByUser[user_id]
						select Tuple.Create(update_data.Items[index], update_data[index])
					).ToArray();
					var items_to_rate = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray();
					var predicted_ratings = recommender.ScoreItems(known_ratings, items_to_rate);

					foreach (var pred in predicted_ratings)
					{
						float prediction = pred.Item2;
						float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]);
						float error = prediction - actual_rating;

						rmse += error * error;
						mae  += Math.Abs(error);
						cbd  += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating);
						rating_count++;
					}
					Console.Error.Write(".");
				}

			mae  = mae / rating_count;
			rmse = Math.Sqrt(rmse / rating_count);
			cbd  = cbd / rating_count;

			var result = new RatingPredictionEvaluationResults();
			result["RMSE"] = (float) rmse;
			result["MAE"]  = (float) mae;
			result["NMAE"] = (float) mae / (recommender.MaxRating - recommender.MinRating);
			result["CBD"]  = (float) cbd;
			return result;
		}
        /// <summary>Compute correlations between two entities for given ratings</summary>
        /// <param name="ratings">the rating data</param>
        /// <param name="entity_type">the entity type, either USER or ITEM</param>
        /// <param name="i">the ID of first entity</param>
        /// <param name="j">the ID of second entity</param>
        /// <param name="shrinkage">the shrinkage parameter</param>
        public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage)
        {
            if (i == j)
            {
                return(1);
            }

            IList <int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i];
            IList <int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            HashSet <int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1);
            HashSet <int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2);

            e1.IntersectWith(e2);

            int n = e1.Count;

            if (n < 2)
            {
                return(0);
            }

            List <Ratings> ratings_by_other_entity = (entity_type == EntityType.USER) ? ratings.ByItem : ratings.ByUser;

            double sum_ij = 0;
            double sum_ii = 0;
            double sum_jj = 0;

            foreach (int other_entity_id in e1)
            {
                double average_rating = ratings_by_other_entity[other_entity_id].Average;

                // get ratings
                double r1 = 0;
                double r2 = 0;
                if (entity_type == EntityType.USER)
                {
                    r1 = ratings.Get(i, other_entity_id, ratings1);
                    r2 = ratings.Get(j, other_entity_id, ratings2);
                }
                else
                {
                    r1 = ratings.Get(other_entity_id, i, ratings1);
                    r2 = ratings.Get(other_entity_id, j, ratings2);
                }

                double dev_i = r1 - average_rating;
                double dev_j = r2 - average_rating;

                // update sums
                sum_ij += dev_i * dev_j;
                sum_ii += dev_i * dev_i;
                sum_jj += dev_j * dev_j;
            }

            double denominator = Math.Sqrt(sum_ii * sum_jj);

            if (denominator == 0)
            {
                return(0);
            }
            double adjusted_cosine = sum_ij / denominator;

            return((float)adjusted_cosine * (n / (n + shrinkage)));
        }
Exemple #7
0
        ///
        public float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j)
        {
            if (i == j)
                return 1;

            IList<int> indexes1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i];
            IList<int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            var e1 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes1) : ratings.GetUsers(indexes1);
            var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2);

            e1.IntersectWith(e2);

            int n = e1.Count;
            if (n < 2)
                return 0;

            // single-pass variant
            double i_sum = 0;
            double j_sum = 0;
            double ij_sum = 0;
            double ii_sum = 0;
            double jj_sum = 0;
            foreach (int other_entity_id in e1)
            {
                // get ratings
                float r1 = 0;
                float r2 = 0;
                if (entity_type == EntityType.USER)
                {
                    r1 = ratings.Get(i, other_entity_id, indexes1);
                    r2 = ratings.Get(j, other_entity_id, indexes2);
                }
                else
                {
                    r1 = ratings.Get(other_entity_id, i, indexes1);
                    r2 = ratings.Get(other_entity_id, j, indexes2);
                }

                // update sums
                i_sum  += r1;
                j_sum  += r2;
                ij_sum += r1 * r2;
                ii_sum += r1 * r1;
                jj_sum += r2 * r2;
            }

            return ComputeCorrelation (i_sum, j_sum, ii_sum, jj_sum, ij_sum, n);
        }
Exemple #8
0
        ///
        public float ComputeCorrelation(IRatings ratings, EntityType entity_type, IList<Tuple<int, float>> entity_ratings, int j)
        {
            IList<int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            var e1 = new HashSet<int>(from pair in entity_ratings select pair.Item1);
            var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2);

            e1.IntersectWith(e2);
            var ratings1 = new Dictionary<int, float>();
            for (int index = 0; index < entity_ratings.Count; index++)
                if (e1.Contains(entity_ratings[index].Item1))
                    ratings1.Add(entity_ratings[index].Item1, entity_ratings[index].Item2);

            int n = e1.Count;
            if (n < 2)
                return 0;

            // single-pass variant
            double i_sum = 0;
            double j_sum = 0;
            double ij_sum = 0;
            double ii_sum = 0;
            double jj_sum = 0;
            foreach (int other_entity_id in e1)
            {
                // get ratings
                float r1 = ratings1[other_entity_id];
                float r2 = 0;
                if (entity_type == EntityType.USER)
                    r2 = ratings.Get(j, other_entity_id, indexes2);
                else
                    r2 = ratings.Get(other_entity_id, j, indexes2);

                // update sums
                i_sum  += r1;
                j_sum  += r2;
                ij_sum += r1 * r2;
                ii_sum += r1 * r1;
                jj_sum += r2 * r2;
            }

            return ComputeCorrelation (i_sum, j_sum, ii_sum, jj_sum, ij_sum, n);
        }
Exemple #9
0
		/// <summary>Performs user-wise fold-in evaluation, but instead of folding in perform incremental training with the new data</summary>
		/// <remarks>
		/// </remarks>
		/// <returns>the evaluation results</returns>
		/// <param name='recommender'>a rating predictor capable of performing a user fold-in</param>
		/// <param name='update_data'>the rating data used to represent the users</param>
		/// <param name='eval_data'>the evaluation data</param>
		static public RatingPredictionEvaluationResults EvaluateFoldInIncrementalTraining(this IncrementalRatingPredictor recommender, IRatings update_data, IRatings eval_data)
		{
			double rmse = 0;
			double mae  = 0;
			double cbd  = 0;

			int rating_count = 0;
			foreach (int user_id in update_data.AllUsers)
				if (eval_data.AllUsers.Contains(user_id))
				{
					var local_recommender = (IncrementalRatingPredictor) recommender.Clone();

					// add ratings and perform incremental training
					var user_ratings = new RatingsProxy(update_data, update_data.ByUser[user_id]);
					local_recommender.AddRatings(user_ratings);

					var items_to_rate = (from index in eval_data.ByUser[user_id] select eval_data.Items[index]).ToArray();
					var predicted_ratings = recommender.Recommend(user_id, candidate_items:items_to_rate);

					foreach (var pred in predicted_ratings)
					{
						float prediction = pred.Item2;
						float actual_rating = eval_data.Get(user_id, pred.Item1, eval_data.ByUser[user_id]);
						float error = prediction - actual_rating;

						rmse += error * error;
						mae  += Math.Abs(error);
						cbd  += Eval.Ratings.ComputeCBD(actual_rating, prediction, recommender.MinRating, recommender.MaxRating);
						rating_count++;
					}

					// remove ratings again
					local_recommender.RemoveRatings(user_ratings);

					Console.Error.Write(".");
				}
			
			mae  = mae / rating_count;
			rmse = Math.Sqrt(rmse / rating_count);
			cbd  = cbd / rating_count;

			var result = new RatingPredictionEvaluationResults();
			result["RMSE"] = (float) rmse;
			result["MAE"]  = (float) mae;
			result["NMAE"] = (float) mae / (recommender.MaxRating - recommender.MinRating);
			result["CBD"]  = (float) cbd;
			return result;
		}
Exemple #10
0
        /// <summary>Compute correlations between two entities for given ratings</summary>
        /// <param name="ratings">the rating data</param>
        /// <param name="entity_type">the entity type, either USER or ITEM</param>
        /// <param name="i">the ID of first entity</param>
        /// <param name="j">the ID of second entity</param>
        /// <param name="shrinkage">the shrinkage parameter</param>
        public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage)
        {
            if (i == j)
                return 1;

            IList<int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i];
            IList<int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            HashSet<int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1);
            HashSet<int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2);

            e1.IntersectWith(e2);

            int n = e1.Count;
            if (n < 2)
                return 0;

            List<Ratings> ratings_by_other_entity = (entity_type == EntityType.USER) ? ratings.ByItem : ratings.ByUser;

            double sum_ij = 0;
            double sum_ii = 0;
            double sum_jj = 0;

            foreach (int other_entity_id in e1)
            {
                double average_rating = ratings_by_other_entity[other_entity_id].Average;

                // get ratings
                double r1 = 0;
                double r2 = 0;
                if (entity_type == EntityType.USER)
                {
                    r1 = ratings.Get(i, other_entity_id, ratings1);
                    r2 = ratings.Get(j, other_entity_id, ratings2);
                }
                else
                {
                    r1 = ratings.Get(other_entity_id, i, ratings1);
                    r2 = ratings.Get(other_entity_id, j, ratings2);
                }

                double dev_i = r1 - average_rating;
                double dev_j = r2 - average_rating;

                // update sums
                sum_ij += dev_i * dev_j;
                sum_ii += dev_i * dev_i;
                sum_jj += dev_j * dev_j;
            }

            double denominator = Math.Sqrt( sum_ii * sum_jj );

            if (denominator == 0)
                return 0;
            double adjusted_cosine = sum_ij / denominator;

            return (float) adjusted_cosine * (n / (n + shrinkage));
        }
Exemple #11
0
        /// <summary>Compute correlations between two entities for given ratings</summary>
        /// <param name="ratings">the rating data</param>
        /// <param name="entity_type">the entity type, either USER or ITEM</param>
        /// <param name="i">the ID of first entity</param>
        /// <param name="j">the ID of second entity</param>
        /// <param name="shrinkage">the shrinkage parameter</param>
        public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage)
        {
            if (i == j)
            {
                return(1);
            }

            IList <int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i];
            IList <int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            HashSet <int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1);
            HashSet <int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2);

            e1.IntersectWith(e2);

            int n = e1.Count;

            if (n < 2)
            {
                return(0);
            }

            // single-pass variant
            double i_sum  = 0;
            double j_sum  = 0;
            double ij_sum = 0;
            double ii_sum = 0;
            double jj_sum = 0;

            foreach (int other_entity_id in e1)
            {
                // get ratings
                double r1 = 0;
                double r2 = 0;
                if (entity_type == EntityType.USER)
                {
                    r1 = ratings.Get(i, other_entity_id, ratings1);
                    r2 = ratings.Get(j, other_entity_id, ratings2);
                }
                else
                {
                    r1 = ratings.Get(other_entity_id, i, ratings1);
                    r2 = ratings.Get(other_entity_id, j, ratings2);
                }

                // update sums
                i_sum  += r1;
                j_sum  += r2;
                ij_sum += r1 * r2;
                ii_sum += r1 * r1;
                jj_sum += r2 * r2;
            }

            double denominator = Math.Sqrt((n * ii_sum - i_sum * i_sum) * (n * jj_sum - j_sum * j_sum));

            if (denominator == 0)
            {
                return(0);
            }
            double pmcc = (n * ij_sum - i_sum * j_sum) / denominator;

            return((float)pmcc * (n / (n + shrinkage)));
        }
Exemple #12
0
        /// <summary>Compute correlations between two entities for given ratings</summary>
        /// <param name="ratings">the rating data</param>
        /// <param name="entity_type">the entity type, either USER or ITEM</param>
        /// <param name="i">the ID of first entity</param>
        /// <param name="j">the ID of second entity</param>
        /// <param name="shrinkage">the shrinkage parameter</param>
        public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, int i, int j, float shrinkage)
        {
            if (i == j)
                return 1;

            IList<int> ratings1 = (entity_type == EntityType.USER) ? ratings.ByUser[i] : ratings.ByItem[i];
            IList<int> ratings2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            HashSet<int> e1 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings1) : ratings.GetUsers(ratings1);
            HashSet<int> e2 = (entity_type == EntityType.USER) ? ratings.GetItems(ratings2) : ratings.GetUsers(ratings2);

            e1.IntersectWith(e2);

            int n = e1.Count;
            if (n < 2)
                return 0;

            // single-pass variant
            double i_sum = 0;
            double j_sum = 0;
            double ij_sum = 0;
            double ii_sum = 0;
            double jj_sum = 0;
            foreach (int other_entity_id in e1)
            {
                // get ratings
                double r1 = 0;
                double r2 = 0;
                if (entity_type == EntityType.USER)
                {
                    r1 = ratings.Get(i, other_entity_id, ratings1);
                    r2 = ratings.Get(j, other_entity_id, ratings2);
                }
                else
                {
                    r1 = ratings.Get(other_entity_id, i, ratings1);
                    r2 = ratings.Get(other_entity_id, j, ratings2);
                }

                // update sums
                i_sum  += r1;
                j_sum  += r2;
                ij_sum += r1 * r2;
                ii_sum += r1 * r1;
                jj_sum += r2 * r2;
            }

            double denominator = Math.Sqrt( (n * ii_sum - i_sum * i_sum) * (n * jj_sum - j_sum * j_sum) );

            if (denominator == 0)
                return 0;
            double pmcc = (n * ij_sum - i_sum * j_sum) / denominator;

            return (float) pmcc * (n / (n + shrinkage));
        }
Exemple #13
0
        /// <summary>Compute correlation between two entities for given ratings</summary>
        /// <param name="ratings">the rating data</param>
        /// <param name="entity_type">the entity type, either USER or ITEM</param>
        /// <param name="entity_ratings">ratings identifying the first entity</param>
        /// <param name="j">the ID of second entity</param>
        /// <param name="shrinkage">the shrinkage parameter, set to 0 for the standard Pearson correlation without shrinkage</param>
        public static float ComputeCorrelation(IRatings ratings, EntityType entity_type, IList<Pair<int, float>> entity_ratings, int j, float shrinkage)
        {
            IList<int> indexes2 = (entity_type == EntityType.USER) ? ratings.ByUser[j] : ratings.ByItem[j];

            // get common ratings for the two entities
            var e1 = new HashSet<int>(from pair in entity_ratings select pair.First);
            var e2 = (entity_type == EntityType.USER) ? ratings.GetItems(indexes2) : ratings.GetUsers(indexes2);

            e1.IntersectWith(e2);
            var ratings1 = new Dictionary<int, float>();
            for (int index = 0; index < entity_ratings.Count; index++)
                if (e1.Contains(entity_ratings[index].First))
                    ratings1.Add(entity_ratings[index].First, entity_ratings[index].Second);

            int n = e1.Count;
            if (n < 2)
                return 0;

            // single-pass variant
            double i_sum = 0;
            double j_sum = 0;
            double ij_sum = 0;
            double ii_sum = 0;
            double jj_sum = 0;
            foreach (int other_entity_id in e1)
            {
                // get ratings
                float r1 = ratings1[other_entity_id];
                float r2 = 0;
                if (entity_type == EntityType.USER)
                    r2 = ratings.Get(j, other_entity_id, indexes2);
                else
                    r2 = ratings.Get(other_entity_id, j, indexes2);

                // update sums
                i_sum  += r1;
                j_sum  += r2;
                ij_sum += r1 * r2;
                ii_sum += r1 * r1;
                jj_sum += r2 * r2;
            }

            double denominator = Math.Sqrt( (n * ii_sum - i_sum * i_sum) * (n * jj_sum - j_sum * j_sum) );

            if (denominator == 0)
                return 0;
            double pmcc = (n * ij_sum - i_sum * j_sum) / denominator;

            return (float) pmcc * ((n - 1) / (n - 1 + shrinkage));
        }