Shrunk Pearson correlation for rating data

The correlation values are shrunk towards zero, depending on the number of ratings the estimate is based on. Otherwise, we would give too much weight to similarities estimated from just a few examples.

http://en.wikipedia.org/wiki/Pearson_correlation

We apply shrinkage as in formula (5.16) of chapter 5 of the Recommender Systems Handbook. Note that the shrinkage formula has changed betweem the two publications. It is now based on the assumption that the true correlations are normally distributed; the shrunk estimate is the posterior mean of the empirical estimate.

Literature: Yehuda Koren: Factor in the Neighbors: Scalable and Accurate Collaborative Filtering, Transactions on Knowledge Discovery from Data (TKDD), 2009. http://public.research.att.com/~volinsky/netflix/factorizedNeighborhood.pdf Yehuda Koren, Robert Bell: Advances in Collaborative Filtering, Chapter 5 of the Recommender Systems Handbook, Springer, 2011. http://research.yahoo.net/files/korenBellChapterSpringer.pdf

Наследование: RatingCorrelationMatrix
Пример #1
0
        /// <summary>Create a Pearson correlation matrix from given data</summary>
        /// <param name="ratings">the ratings data</param>
        /// <param name="entity_type">the entity type, either USER or ITEM</param>
        /// <param name="shrinkage">a shrinkage parameter</param>
        /// <returns>the complete Pearson correlation matrix</returns>
        static public CorrelationMatrix Create(IRatings ratings, EntityType entity_type, float shrinkage)
        {
            Pearson cm;
            int     num_entities = 0;

            if (entity_type.Equals(EntityType.USER))
            {
                num_entities = ratings.MaxUserID + 1;
            }
            else if (entity_type.Equals(EntityType.ITEM))
            {
                num_entities = ratings.MaxItemID + 1;
            }
            else
            {
                throw new ArgumentException("Unknown entity type: " + entity_type);
            }

            try
            {
                cm = new Pearson(num_entities);
            }
            catch (OverflowException)
            {
                Console.Error.WriteLine("Too many entities: " + num_entities);
                throw;
            }
            cm.shrinkage = shrinkage;
            cm.ComputeCorrelations(ratings, entity_type);
            return(cm);
        }
Пример #2
0
		[Test()] public void TestComputeCorrelation()
		{
			// create test objects
			var ratings = new Ratings();
			ratings.Add(0, 1, 0.3f);
			ratings.Add(0, 4, 0.2f);
			ratings.Add(1, 2, 0.6f);
			ratings.Add(1, 3, 0.4f);
			ratings.Add(1, 4, 0.2f);

			// test
			var p = new Pearson(ratings.AllUsers.Count, 0f);
			Assert.AreEqual(0, p.ComputeCorrelation(ratings, EntityType.USER, 0, 1));
		}
Пример #3
0
		[Test()] public void TestCreate()
		{
			var ratings = new Ratings();
			ratings.Add(0, 1, 0.3f);
			ratings.Add(0, 2, 0.6f);
			ratings.Add(0, 4, 0.2f);
			ratings.Add(1, 3, 0.4f);
			ratings.Add(1, 4, 0.2f);
			ratings.Add(2, 0, 0.1f);
			ratings.Add(2, 1, 0.3f);

			var correlation_matrix = new Pearson(ratings.MaxUserID + 1, 0f);
			correlation_matrix.ComputeCorrelations(ratings, EntityType.USER);
			Assert.AreEqual(3, correlation_matrix.NumberOfRows);
			Assert.IsTrue(correlation_matrix.IsSymmetric);
			Assert.AreEqual(0, correlation_matrix[0, 1]);
		}
Пример #4
0
		[Test()] public void TestComputeCorrelations()
		{
			// create test objects
			var pearson = new Pearson(3, 0f);
			var rating_data = new Ratings();
			rating_data.Add(0, 1, 0.3f);
			rating_data.Add(0, 2, 0.6f);
			rating_data.Add(0, 4, 0.2f);
			rating_data.Add(1, 3, 0.4f);
			rating_data.Add(1, 4, 0.2f);
			rating_data.Add(2, 0, 0.1f);
			rating_data.Add(2, 1, 0.3f);
			// test
			pearson.Shrinkage = 0;
			pearson.ComputeCorrelations(rating_data, EntityType.USER);

			Assert.AreEqual(0, pearson[0, 2]);
		}
Пример #5
0
		[Test()] public void TestComputeCorrelations2()
		{
			// load data from disk
			var user_mapping = new Mapping();
			var item_mapping = new Mapping();
			var ratings = RatingData.Read("../../../../data/ml-100k/u1.base", user_mapping, item_mapping);
			
			var p = new Pearson(ratings.AllUsers.Count, 200f);
			Assert.AreEqual(-0.02788301f, p.ComputeCorrelation(ratings, EntityType.ITEM, 45, 311), 0.00001);
		}
Пример #6
0
        /// <summary>Create a Pearson correlation matrix from given data</summary>
        /// <param name="ratings">the ratings data</param>
        /// <param name="entity_type">the entity type, either USER or ITEM</param>
        /// <param name="shrinkage">a shrinkage parameter</param>
        /// <returns>the complete Pearson correlation matrix</returns>
        public static CorrelationMatrix Create(IRatings ratings, EntityType entity_type, float shrinkage)
        {
            Pearson cm;
            int num_entities = 0;
            if (entity_type.Equals(EntityType.USER))
                num_entities = ratings.MaxUserID + 1;
            else if (entity_type.Equals(EntityType.ITEM))
                num_entities = ratings.MaxItemID + 1;
            else
                throw new ArgumentException("Unknown entity type: " + entity_type);

            try
            {
                cm = new Pearson(num_entities);
            }
            catch (OverflowException)
            {
                Console.Error.WriteLine("Too many entities: " + num_entities);
                throw;
            }
            cm.shrinkage = shrinkage;
            cm.ComputeCorrelations(ratings, entity_type);
            return cm;
        }