예제 #1
0
		/// <summary>Compute the overlap between the vectors in a binary matrix</summary>
		/// <returns>a sparse matrix with the overlaps</returns>
		/// <param name='entity_data'>the binary matrix</param>
		public static Tuple<IMatrix<float>, IList<float>> ComputeWeighted(IBooleanMatrix entity_data)
		{
			var transpose = (IBooleanMatrix) entity_data.Transpose();

			var other_entity_weights = new float[transpose.NumberOfRows];
			for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
			{
				int freq = transpose.GetEntriesByRow(row_id).Count;
				other_entity_weights[row_id] = 1f / (float) Math.Log(3 + freq, 2); // TODO make configurable
			}

			IMatrix<float> weighted_overlap = new SymmetricMatrix<float>(entity_data.NumberOfRows);
			IList<float> entity_weights = new float[entity_data.NumberOfRows];

			// go over all (other) entities
			for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
			{
				var row = transpose.GetEntriesByRow(row_id);
				for (int i = 0; i < row.Count; i++)
				{
					int x = row[i];
					entity_weights[x] += other_entity_weights[row_id];
					for (int j = i + 1; j < row.Count; j++)
					{
						int y = row[j];
						weighted_overlap[x, y] += other_entity_weights[row_id] * other_entity_weights[row_id];
					}
				}
			}

			return Tuple.Create(weighted_overlap, entity_weights);
		}
        /// <summary>Compute the overlap between the vectors in a binary matrix</summary>
        /// <returns>a sparse matrix with the overlaps</returns>
        /// <param name='entity_data'>the binary matrix</param>
        public static Tuple <IMatrix <float>, IList <float> > ComputeWeighted(IBooleanMatrix entity_data)
        {
            var transpose = (IBooleanMatrix)entity_data.Transpose();

            var other_entity_weights = new float[transpose.NumberOfRows];

            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                int freq = transpose.GetEntriesByRow(row_id).Count;
                other_entity_weights[row_id] = 1f / (float)Math.Log(3 + freq, 2);                  // TODO make configurable
            }

            IMatrix <float> weighted_overlap = new SymmetricMatrix <float>(entity_data.NumberOfRows);
            IList <float>   entity_weights   = new float[entity_data.NumberOfRows];

            // go over all (other) entities
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                var row = transpose.GetEntriesByRow(row_id);
                for (int i = 0; i < row.Count; i++)
                {
                    int x = row[i];
                    entity_weights[x] += other_entity_weights[row_id];
                    for (int j = i + 1; j < row.Count; j++)
                    {
                        int y = row[j];
                        weighted_overlap[x, y] += other_entity_weights[row_id] * other_entity_weights[row_id];
                    }
                }
            }

            return(Tuple.Create(weighted_overlap, entity_weights));
        }
예제 #3
0
        /// <summary>Display data statistics for item recommendation datasets</summary>
        /// <param name="training_data">the training dataset</param>
        /// <param name="test_data">the test dataset</param>
        /// <param name="user_attributes">the user attributes</param>
        /// <param name="item_attributes">the item attributes</param>
        public static string Statistics(
            this IPosOnlyFeedback training_data, IPosOnlyFeedback test_data = null,
            IBooleanMatrix user_attributes = null, IBooleanMatrix item_attributes = null)
        {
            // training data stats
            int    num_users   = training_data.AllUsers.Count;
            int    num_items   = training_data.AllItems.Count;
            long   matrix_size = (long)num_users * num_items;
            long   empty_size  = (long)matrix_size - training_data.Count;
            double sparsity    = (double)100L * empty_size / matrix_size;
            string s           = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, training_data.Count, sparsity);

            // test data stats
            if (test_data != null)
            {
                num_users   = test_data.AllUsers.Count;
                num_items   = test_data.AllItems.Count;
                matrix_size = (long)num_users * num_items;
                empty_size  = (long)matrix_size - test_data.Count;
                sparsity    = (double)100L * empty_size / matrix_size;               // TODO depends on the eval scheme whether this is correct
                s          += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, test_data.Count, sparsity);
            }

            return(s + Statistics(user_attributes, item_attributes));
        }
		void ComputeCorrelationsUShortOverlap(IBooleanMatrix entity_data)
		{
			var overlap = Overlap.ComputeUShort(entity_data);

			for (int x = 0; x < NumEntities; x++)
				for (int y = 0; y < x; y++)
					this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y));
		}
		void ComputeCorrelationsWeighted(IBooleanMatrix entity_data)
		{
			var overlap_and_entity_weights = Overlap.ComputeWeighted(entity_data);
			var overlap        = overlap_and_entity_weights.Item1;
			var entity_weights = overlap_and_entity_weights.Item2;

			for (int x = 0; x < NumEntities; x++)
				for (int y = 0; y < x; y++)
					this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_weights[x], entity_weights[y]);
		}
		///
		public int Overlap(IBooleanMatrix s)
		{
			int c = 0;

			for (int i = 0; i < row_list.Count; i++)
				foreach (int j in row_list[i])
					if (s[i, j])
						c++;

			return c;
		}
        void ComputeCorrelationsUShortOverlap(IBooleanMatrix entity_data)
        {
            var overlap = Overlap.ComputeUShort(entity_data);

            for (int x = 0; x < NumEntities; x++)
            {
                for (int y = 0; y < x; y++)
                {
                    this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y));
                }
            }
        }
        ///
        protected internal override void InitModel()
        {
            base.InitModel();

            if (user_connections == null)
            {
                user_connections = new SparseBooleanMatrix();
                Console.Error.WriteLine("Warning: UserRelation not set.");
            }

            group = new float[user_connections.NumberOfEntries];
        }
		void ComputeCorrelationsUIntOverlap(IBooleanMatrix entity_data)
		{
			var overlap = Overlap.ComputeUInt(entity_data);

			// compute correlations
			for (int x = 0; x < num_entities; x++)
				for (int y = 0; y < x; y++)
				{
					this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y));
					this[y, x] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(y), entity_data.NumEntriesByRow(x));
				}
		}
예제 #10
0
        ///
        protected internal override void InitModel()
        {
            if (user_connections == null)
            {
                user_connections = new SparseBooleanMatrix();
                Console.Error.WriteLine("Warning: UserRelation not set.");
            }

            this.MaxUserID = Math.Max(MaxUserID, user_connections.NumberOfRows - 1);
            this.MaxUserID = Math.Max(MaxUserID, user_connections.NumberOfColumns - 1);

            base.InitModel();
        }
예제 #11
0
		///
		protected internal override void InitModel()
		{
			if (user_connections == null)
			{
				user_connections = new SparseBooleanMatrix();
				Console.Error.WriteLine("Warning: UserRelation not set.");
			}

			this.MaxUserID = Math.Max(MaxUserID, user_connections.NumberOfRows - 1);
			this.MaxUserID = Math.Max(MaxUserID, user_connections.NumberOfColumns - 1);

			base.InitModel();
		}
        void ComputeCorrelationsWeighted(IBooleanMatrix entity_data)
        {
            var overlap_and_entity_weights = Overlap.ComputeWeighted(entity_data);
            var overlap        = overlap_and_entity_weights.Item1;
            var entity_weights = overlap_and_entity_weights.Item2;

            for (int x = 0; x < NumEntities; x++)
            {
                for (int y = 0; y < x; y++)
                {
                    this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_weights[x], entity_weights[y]);
                }
            }
        }
예제 #13
0
		/// <summary>Optimizes the specified data</summary>
		/// <param name="data">data</param>
		/// <param name="W">W</param>
		/// <param name="H">H</param>
		protected virtual void Optimize(IBooleanMatrix data, Matrix<float> W, Matrix<float> H)
		{
			// comments are in terms of computing the user factors
			// ... works the same with users and items exchanged

			// (1) create HH in O(f^2|Items|)
			var HH = ComputeSquareMatrix(H);
			// (2) optimize all U
			Parallel.For(
				0,
				W.dim1,
				u => { Optimize(u, data, W, H, HH); }
			);
		}
        void ComputeCorrelationsUIntOverlap(IBooleanMatrix entity_data)
        {
            var overlap = Overlap.ComputeUInt(entity_data);

            // compute correlations
            for (int x = 0; x < num_entities; x++)
            {
                for (int y = 0; y < x; y++)
                {
                    this[x, y] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(x), entity_data.NumEntriesByRow(y));
                    this[y, x] = ComputeCorrelationFromOverlap(overlap[x, y], entity_data.NumEntriesByRow(y), entity_data.NumEntriesByRow(x));
                }
            }
        }
예제 #15
0
        /// <summary>Display dataset statistics</summary>
        /// <param name="train">the training data</param>
        /// <param name="test">the test data</param>
        /// <param name="user_attributes">the user attributes</param>
        /// <param name="item_attributes">the item attributes</param>
        /// <param name="display_overlap">if set true, display the user/item overlap between train and test</param>
        public static string Statistics(
            this IRatings train, IRatings test = null,
            IBooleanMatrix user_attributes     = null, IBooleanMatrix item_attributes = null,
            bool display_overlap = false)
        {
            // training data stats
            int    num_users   = train.AllUsers.Count;
            int    num_items   = train.AllItems.Count;
            long   matrix_size = (long)num_users * num_items;
            long   empty_size  = (long)matrix_size - train.Count;
            double sparsity    = (double)100L * empty_size / matrix_size;
            string s           = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, train.Count, sparsity);

            if (train is ITimedRatings)
            {
                var time_train = train as ITimedRatings;
                s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_train.EarliestTime, time_train.LatestTime);
            }

            // test data stats
            if (test != null)
            {
                num_users   = test.AllUsers.Count;
                num_items   = test.AllItems.Count;
                matrix_size = (long)num_users * num_items;
                empty_size  = (long)matrix_size - test.Count;                  // TODO depends on the eval scheme whether this is correct
                sparsity    = (double)100L * empty_size / matrix_size;
                s          += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, test.Count, sparsity);
                if (test is ITimedRatings)
                {
                    var time_test = test as ITimedRatings;
                    s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_test.EarliestTime, time_test.LatestTime);
                }
            }

            // count and display the overlap between train and test
            if (display_overlap && test != null)
            {
                int      num_new_users = 0;
                int      num_new_items = 0;
                TimeSpan seconds       = Wrap.MeasureTime(delegate() {
                    num_new_users = test.AllUsers.Except(train.AllUsers).Count();
                    num_new_items = test.AllItems.Except(train.AllItems).Count();
                });
                s += string.Format("{0} new users, {1} new items ({2} seconds)\n", num_new_users, num_new_items, seconds);
            }

            return(s + Statistics(user_attributes, item_attributes));
        }
예제 #16
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        protected virtual void Optimize(IBooleanMatrix data, Matrix <float> W, Matrix <float> H)
        {
            // comments are in terms of computing the user factors
            // ... works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            var HH = ComputeSquareMatrix(H);

            // (2) optimize all U
            Parallel.For(
                0,
                W.dim1,
                u => { Optimize(u, data, W, H, HH); }
                );
        }
		///
		public void ComputeCorrelations(IBooleanMatrix entity_data)
		{
			Resize(entity_data.NumberOfRows);

			// the diagonal of the correlation matrix
			for (int i = 0; i < NumEntities; i++)
				this[i, i] = 1;

			if (Weighted)
				ComputeCorrelationsWeighted(entity_data);
			else if (entity_data.NumberOfColumns > ushort.MaxValue) // if possible, save some memory
				ComputeCorrelationsUIntOverlap(entity_data);
			else
				ComputeCorrelationsUShortOverlap(entity_data);
		}
예제 #18
0
		/// <summary>Display dataset statistics</summary>
		/// <param name="train">the training data</param>
		/// <param name="test">the test data</param>
		/// <param name="user_attributes">the user attributes</param>
		/// <param name="item_attributes">the item attributes</param>
		/// <param name="display_overlap">if set true, display the user/item overlap between train and test</param>
		public static string Statistics(
			this IRatings train, IRatings test = null,
			IBooleanMatrix user_attributes = null, IBooleanMatrix item_attributes = null,
			bool display_overlap = false)
		{
			// training data stats
			int num_users = train.AllUsers.Count;
			int num_items = train.AllItems.Count;
			long matrix_size = (long) num_users * num_items;
			long empty_size  = (long) matrix_size - train.Count;
			double sparsity = (double) 100L * empty_size / matrix_size;
			string s = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, train.Count, sparsity);
			if (train is ITimedRatings)
			{
				var time_train = train as ITimedRatings;
				s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_train.EarliestTime, time_train.LatestTime);
			}

			// test data stats
			if (test != null)
			{
				num_users = test.AllUsers.Count;
				num_items = test.AllItems.Count;
				matrix_size = (long) num_users * num_items;
				empty_size  = (long) matrix_size - test.Count; // TODO depends on the eval scheme whether this is correct
				sparsity = (double) 100L * empty_size / matrix_size;
				s += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} ratings, sparsity {3,0:0.#####}\n", num_users, num_items, test.Count, sparsity);
				if (test is ITimedRatings)
				{
					var time_test = test as ITimedRatings;
					s += string.Format(CultureInfo.InvariantCulture, "rating period: {0} to {1}\n", time_test.EarliestTime, time_test.LatestTime);
				}
			}

			// count and display the overlap between train and test
			if (display_overlap && test != null)
			{
				int num_new_users = 0;
				int num_new_items = 0;
				TimeSpan seconds = Wrap.MeasureTime(delegate() {
					num_new_users = test.AllUsers.Except(train.AllUsers).Count();
					num_new_items = test.AllItems.Except(train.AllItems).Count();
				});
				s += string.Format("{0} new users, {1} new items ({2} seconds)\n", num_new_users, num_new_items, seconds);
			}

			return s + Statistics(user_attributes, item_attributes);
		}
예제 #19
0
 /// <summary>Creates a Cosine similarity matrix from given data</summary>
 /// <param name="vectors">the boolean data</param>
 /// <returns>the similarity matrix based on the data</returns>
 public static CorrelationMatrix Create(IBooleanMatrix vectors)
 {
     BinaryDataCorrelationMatrix cm;
     int num_entities = vectors.NumberOfRows;
     try
     {
         cm = new WeightedBinaryCosine(num_entities);
     }
     catch (OverflowException)
     {
         Console.Error.WriteLine("Too many entities: " + num_entities);
         throw;
     }
     cm.ComputeCorrelations(vectors);
     return cm;
 }
예제 #20
0
        ///
        public override void ComputeCorrelations(IBooleanMatrix entity_data)
        {
            var transpose = entity_data.Transpose();

            var overlap = new SparseMatrix <int>(entity_data.NumberOfRows, entity_data.NumberOfRows);

            // go over all (other) entities
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                var row = ((IBooleanMatrix)transpose).GetEntriesByRow(row_id);

                for (int i = 0; i < row.Count; i++)
                {
                    int x = row[i];

                    for (int j = i + 1; j < row.Count; j++)
                    {
                        int y = row[j];

                        if (x < y)
                        {
                            overlap[x, y]++;
                        }
                        else
                        {
                            overlap[y, x]++;
                        }
                    }
                }
            }

            // the diagonal of the correlation matrix
            for (int i = 0; i < num_entities; i++)
            {
                this[i, i] = 1;
            }

            // compute cosine
            foreach (var index_pair in overlap.NonEmptyEntryIDs)
            {
                int x = index_pair.First;
                int y = index_pair.Second;

                this[x, y] = (float)(overlap[x, y] / Math.Sqrt(entity_data.NumEntriesByRow(x) * entity_data.NumEntriesByRow(y)));
            }
        }
예제 #21
0
        /// <summary>Get the overlap of two matrices, i.e. the number of true entries where they agree</summary>
        /// <param name="s">the <see cref="SparseBooleanMatrix"/> to compare to</param>
        /// <returns>the number of entries that are true in both matrices</returns>
        public int Overlap(IBooleanMatrix s)
        {
            int c = 0;

            for (int i = 0; i < row_list.Count; i++)
            {
                foreach (int j in row_list[i])
                {
                    if (s[i, j])
                    {
                        c++;
                    }
                }
            }

            return(c);
        }
예제 #22
0
        /// <summary>Creates a Cosine similarity matrix from given data</summary>
        /// <param name="vectors">the boolean data</param>
        /// <returns>the similarity matrix based on the data</returns>
        static public CorrelationMatrix Create(IBooleanMatrix vectors)
        {
            BinaryDataCorrelationMatrix cm;
            int num_entities = vectors.NumberOfRows;

            try
            {
                cm = new BinaryCosine(num_entities);
            }
            catch (OverflowException)
            {
                Console.Error.WriteLine("Too many entities: " + num_entities);
                throw;
            }
            cm.ComputeCorrelations(vectors);
            return(cm);
        }
예제 #23
0
		/// <summary>Compute the overlap between the vectors in a binary matrix</summary>
		/// <returns>a sparse matrix with the overlaps</returns>
		/// <param name='entity_data'>the binary matrix</param>
		public static IMatrix<uint> ComputeUInt(IBooleanMatrix entity_data)
		{
			var transpose = entity_data.Transpose() as IBooleanMatrix;

			var overlap = new SymmetricSparseMatrix<uint>(entity_data.NumberOfRows);

			// go over all (other) entities
			for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
			{
				var row = transpose.GetEntriesByRow(row_id);
				for (int i = 0; i < row.Count; i++)
				{
					int x = row[i];
					for (int j = i + 1; j < row.Count; j++)
						overlap[x, row[j]]++;
				}
			}
			return overlap;
		}
예제 #24
0
    protected virtual void LoadData()
    {
        training_file = Path.Combine(data_dir, training_file);
        if (test_file != null)
        {
            test_file = Path.Combine(data_dir, test_file);
        }

        // user attributes
        if (user_attributes_file != null)
        {
            user_attributes = AttributeData.Read(Path.Combine(data_dir, user_attributes_file), user_mapping);
        }
        if (recommender is IUserAttributeAwareRecommender)
        {
            ((IUserAttributeAwareRecommender)recommender).UserAttributes = user_attributes;
        }

        // item attributes
        if (item_attributes_file != null)
        {
            item_attributes = AttributeData.Read(Path.Combine(data_dir, item_attributes_file), item_mapping);
        }
        if (recommender is IItemAttributeAwareRecommender)
        {
            ((IItemAttributeAwareRecommender)recommender).ItemAttributes = item_attributes;
        }

        // user relation
        if (recommender is IUserRelationAwareRecommender)
        {
            ((IUserRelationAwareRecommender)recommender).UserRelation = RelationData.Read(Path.Combine(data_dir, user_relations_file), user_mapping);
            Console.WriteLine("relation over {0} users", ((IUserRelationAwareRecommender)recommender).NumUsers);
        }

        // item relation
        if (recommender is IItemRelationAwareRecommender)
        {
            ((IItemRelationAwareRecommender)recommender).ItemRelation = RelationData.Read(Path.Combine(data_dir, item_relations_file), item_mapping);
            Console.WriteLine("relation over {0} items", ((IItemRelationAwareRecommender)recommender).NumItems);
        }
    }
예제 #25
0
        /// <summary>Display statistics for user and item attributes</summary>
        /// <param name="user_attributes">the user attributes</param>
        /// <param name="item_attributes">the item attributes</param>
        public static string Statistics(IBooleanMatrix user_attributes, IBooleanMatrix item_attributes)
        {
            string s = string.Empty;

            if (user_attributes != null)
            {
                s += string.Format(
                    "{0} user attributes for {1} users, {2} assignments, {3} users with attribute assignments\n",
                    user_attributes.NumberOfColumns, user_attributes.NumberOfRows,
                    user_attributes.NumberOfEntries, user_attributes.NonEmptyRowIDs.Count);
            }
            if (item_attributes != null)
            {
                s += string.Format(
                    "{0} item attributes for {1} items, {2} assignments, {3} items with attribute assignments\n",
                    item_attributes.NonEmptyColumnIDs.Count, item_attributes.NumberOfRows,
                    item_attributes.NumberOfEntries, item_attributes.NonEmptyRowIDs.Count);
            }
            return(s);
        }
        /// <summary>Computes the overlap between the vectors in a binary matrix</summary>
        /// <returns>a sparse matrix with the overlaps</returns>
        /// <param name='entity_data'>the binary matrix</param>
        public static IMatrix <ushort> ComputeUShort(IBooleanMatrix entity_data)
        {
            var transpose = entity_data.Transpose() as IBooleanMatrix;

            var overlap = new SymmetricSparseMatrix <ushort>(entity_data.NumberOfRows);

            // go over all (other) entities
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                var row = transpose.GetEntriesByRow(row_id);
                for (int i = 0; i < row.Count; i++)
                {
                    int x = row[i];
                    for (int j = i + 1; j < row.Count; j++)
                    {
                        overlap[x, row[j]]++;
                    }
                }
            }
            return(overlap);
        }
        ///
        public void ComputeCorrelations(IBooleanMatrix entity_data)
        {
            Resize(entity_data.NumberOfRows);

            // the diagonal of the correlation matrix
            for (int i = 0; i < NumEntities; i++)
            {
                this[i, i] = 1;
            }

            if (Weighted)
            {
                ComputeCorrelationsWeighted(entity_data);
            }
            else if (entity_data.NumberOfColumns > ushort.MaxValue)             // if possible, save some memory
            {
                ComputeCorrelationsUIntOverlap(entity_data);
            }
            else
            {
                ComputeCorrelationsUShortOverlap(entity_data);
            }
        }
예제 #28
0
        ///
        public override void ComputeCorrelations(IBooleanMatrix entity_data)
        {
            var transpose = entity_data.Transpose() as IBooleanMatrix;

            var overlap = new SymmetricMatrix<int>(entity_data.NumberOfRows);

            // go over all (other) entities
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                var row = transpose.GetEntriesByRow(row_id);
                for (int i = 0; i < row.Count; i++)
                {
                    int x = row[i];
                    for (int j = i + 1; j < row.Count; j++)
                    {
                        int y = row[j];
                        overlap[x, y]++;
                    }
                }
            }

            // the diagonal of the correlation matrix
            for (int i = 0; i < num_entities; i++)
                this[i, i] = 1;

            // compute Jaccard index
            for (int x = 0; x < num_entities; x++)
                for (int y = 0; y < x; y++)
                    this[x, y] = (float) (overlap[x, y] / (entity_data.NumEntriesByRow(x) + entity_data.NumEntriesByRow(y) - overlap[x, y]));
        }
예제 #29
0
        ///
        protected internal override void InitModel()
        {
            base.InitModel();

            if (user_connections == null)
            {
                user_connections = new SparseBooleanMatrix();
                Console.Error.WriteLine("Warning: UserRelation not set.");
            }

            group = new float[user_connections.NumberOfEntries];
        }
 /// <summary>Compute the correlations from an implicit feedback, positive-only dataset</summary>
 /// <param name="entity_data">the implicit feedback set, rows contain the entities to correlate</param>
 public virtual void ComputeCorrelations(IBooleanMatrix entity_data)
 {
     throw new NotSupportedException();
 }
예제 #31
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        protected virtual void Optimize(IBooleanMatrix data, Matrix <float> W, Matrix <float> H)
        {
            var HH = new Matrix <double>(num_factors, num_factors);

            // comments are in terms of computing the user factors
            // ... works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HH[f_1, f_2] = d;
                }
            }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            Parallel.For(0, W.dim1, u =>
            {
                var row = data.GetEntriesByRow(u);
                // create HC_minus_IH in O(f^2|S_u|)
                var HC_minus_IH = new Matrix <double>(num_factors, num_factors);
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                        {
                            d += H[i, f_1] * H[i, f_2] * alpha;
                        }
                        HC_minus_IH[f_1, f_2] = d;
                    }
                }
                // create HCp in O(f|S_u|)
                var HCp = new double[num_factors];
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    foreach (int i in row)
                    {
                        d += H[i, f] * (1 + alpha);
                    }
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                var m = new DenseMatrix(num_factors, num_factors);
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                        {
                            d += regularization;
                        }
                        m[f_1, f_2] = d;
                    }
                }
                var m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        d += m_inv[f, f_2] * HCp[f_2];
                    }
                    W[u, f] = (float)d;
                }
            });
        }
예제 #32
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        protected virtual void Optimize(IBooleanMatrix data, Matrix <double> W, Matrix <double> H)
        {
            var HH          = new Matrix <double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix <double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors);

            MathNet.Numerics.LinearAlgebra.Matrix m_inv;
            // TODO speed up using more parts of that library

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HH[f_1, f_2] = d;
                }
            }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);
                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                        {
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * c_pos;
                        }
                        HC_minus_IH[f_1, f_2] = d;
                    }
                }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    foreach (int i in row)
                    {
                        //d += H[i, f] * c_pos;
                        d += H[i, f] * (1 + c_pos);
                    }
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                        {
                            d += regularization;
                        }
                        m[f_1, f_2] = d;
                    }
                }
                m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        d += m_inv[f, f_2] * HCp[f_2];
                    }
                    W[u, f] = d;
                }
            }
        }
예제 #33
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="inverse_data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        void Optimize(IBooleanMatrix data, IBooleanMatrix inverse_data, Matrix<double> W, Matrix<double> H)
        {
            var HH          = new Matrix<double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix<double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors);
            MathNet.Numerics.LinearAlgebra.Matrix m_inv;
            // TODO speed up using more parts of that library

            // TODO using properties gives a 3-5% performance penalty

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                        d += H[i, f_1] * H[i, f_2];
                    HH[f_1, f_2] = d;
                }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);

                // prepare KDD Cup specific weighting
                int num_user_items = row.Count;
                int user_positive_weight_sum = 0;
                foreach (int i in row)
                    user_positive_weight_sum += inverse_data.NumEntriesByRow(i);
                double neg_weight_normalization = (double) (num_user_items * (1 + CPos)) / (Feedback.Count - user_positive_weight_sum);
                // TODO precompute
                // TODO check whether this is correct

                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * CPos;
                        HC_minus_IH[f_1, f_2] = d;
                    }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int i = 0; i < inverse_data.NumberOfRows; i++)
                        if (row.Contains(i))
                            d += H[i, f] * (1 + CPos);
                        else
                            d += H[i, f] * inverse_data.NumEntriesByRow(i) * neg_weight_normalization;
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                            d += Regularization;
                        m[f_1, f_2] = d;
                    }
                m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                        d += m_inv[f, f_2] * HCp[f_2];
                    W[u, f] = d;
                }
            }
        }
 /// <summary>Compute the correlations from an implicit feedback, positive-only dataset</summary>
 /// <param name="entity_data">the implicit feedback set, rows contain the entities to correlate</param>
 public virtual void ComputeCorrelations(IBooleanMatrix entity_data)
 {
     throw new NotSupportedException();
 }
 /// <summary>Compute the correlations from an implicit feedback, positive-only dataset</summary>
 /// <param name="entity_data">the implicit feedback set, rows contain the entities to correlate</param>
 public abstract void ComputeCorrelations(IBooleanMatrix entity_data);
예제 #36
0
 ///
 public override void ComputeCorrelations(IBooleanMatrix entity_data)
 {
     // if possible, save some memory
     if (entity_data.NumberOfColumns > ushort.MaxValue)
         ComputeCorrelationsUIntOverlap(entity_data);
     else
         ComputeCorrelationsUShortOverlap(entity_data);
 }
예제 #37
0
        private void Optimize(int u, IBooleanMatrix data, Matrix <float> W, Matrix <float> H, Matrix <double> HH)
        {
            var row = data.GetEntriesByRow(u);
            // HC_minus_IH is symmetric
            // create HC_minus_IH in O(f^2|S_u|)
            var HC_minus_IH = new Matrix <double>(num_factors, num_factors);

            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = f_1; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    foreach (int i in row)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HC_minus_IH[f_1, f_2] = d * Alpha;
                    HC_minus_IH[f_2, f_1] = d * Alpha;
                }
            }
            // create HCp in O(f|S_u|)
            var HCp = new double[num_factors];

            for (int f = 0; f < num_factors; f++)
            {
                double d = 0;
                foreach (int i in row)
                {
                    d += H[i, f];
                }
                HCp[f] = d * (1 + Alpha);
            }
            // create m = HH + HC_minus_IH + reg*I
            // m is symmetric
            // the inverse m_inv is symmetric
            var m = new DenseMatrix(num_factors, num_factors);

            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = f_1; f_2 < num_factors; f_2++)
                {
                    double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                    if (f_1 == f_2)
                    {
                        d += Regularization;
                    }
                    m[f_1, f_2] = d;
                    m[f_2, f_1] = d;
                }
            }
            var m_inv = m.Inverse();

            // write back optimal W
            for (int f = 0; f < num_factors; f++)
            {
                double d = 0;
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    d += m_inv[f, f_2] * HCp[f_2];
                }
                W[u, f] = (float)d;
            }
        }
예제 #38
0
        ///
        public override void ComputeCorrelations(IBooleanMatrix entity_data)
        {
            var transpose = (IBooleanMatrix) entity_data.Transpose();

            var other_entity_weights = new float[transpose.NumberOfRows];
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                int freq = transpose.GetEntriesByRow(row_id).Count;
                other_entity_weights[row_id] = 1f / (float) Math.Log(3 + freq, 2); // TODO make configurable
            }

            var weighted_overlap = new SymmetricMatrix<float>(entity_data.NumberOfRows);
            var entity_weights = new float[entity_data.NumberOfRows];

            // go over all (other) entities
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                var row = transpose.GetEntriesByRow(row_id);
                for (int i = 0; i < row.Count; i++)
                {
                    int x = row[i];
                    entity_weights[x] += other_entity_weights[row_id];
                    for (int j = i + 1; j < row.Count; j++)
                    {
                        int y = row[j];
                        weighted_overlap[x, y] += other_entity_weights[row_id] * other_entity_weights[row_id];
                    }
                }
            }

            // the diagonal of the correlation matrix
            for (int i = 0; i < num_entities; i++)
                this[i, i] = 1;

            // compute cosine
            for (int x = 0; x < num_entities; x++)
                for (int y = 0; y < x; y++)
                    this[x, y] = (float) (weighted_overlap[x, y] / Math.Sqrt(entity_weights[x] * entity_weights[y] ));
        }
        ///
        public int Overlap(IBooleanMatrix s)
        {
            int c = 0;

            for (int i = 0; i < row_list.Count; i++)
                foreach (int j in row_list[i])
                    if (s[i, j])
                        c++;

            return c;
        }
예제 #40
0
        void ComputeCorrelationsUShortOverlap(IBooleanMatrix entity_data)
        {
            var transpose = entity_data.Transpose() as IBooleanMatrix;

            var overlap = new SymmetricMatrix<ushort>(entity_data.NumberOfRows);

            // go over all (other) entities
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                var row = transpose.GetEntriesByRow(row_id);
                for (int i = 0; i < row.Count; i++)
                {
                    int x = row[i];
                    for (int j = i + 1; j < row.Count; j++)
                        overlap[x, row[j]]++;
                }
            }

            // the diagonal of the correlation matrix
            for (int i = 0; i < num_entities; i++)
                this[i, i] = 1;

            // compute cosine
            for (int x = 0; x < num_entities; x++)
                for (int y = 0; y < x; y++)
                {
                    long size_product = entity_data.NumEntriesByRow(x) * entity_data.NumEntriesByRow(y);
                    if (size_product > 0)
                        this[x, y] = (float) (overlap[x, y] / Math.Sqrt(size_product));
                }
        }
예제 #41
0
		private void Optimize(int u, IBooleanMatrix data, Matrix<float> W, Matrix<float> H, Matrix<double> HH)
		{
			var row = data.GetEntriesByRow(u);
			// HC_minus_IH is symmetric
			// create HC_minus_IH in O(f^2|S_u|)
			var HC_minus_IH = new Matrix<double>(num_factors, num_factors);
			for (int f_1 = 0; f_1 < num_factors; f_1++)
				for (int f_2 = f_1; f_2 < num_factors; f_2++)
				{
					double d = 0;
					foreach (int i in row)
						d += H[i, f_1] * H[i, f_2];
					HC_minus_IH[f_1, f_2] = d * alpha;
					HC_minus_IH[f_2, f_1] = d * alpha;
				}
			// create HCp in O(f|S_u|)
			var HCp = new double[num_factors];
			for (int f = 0; f < num_factors; f++)
			{
				double d = 0;
				foreach (int i in row)
					d += H[i, f];
				HCp[f] = d * (1 + alpha);
			}
			// create m = HH + HC_minus_IH + reg*I
			// m is symmetric
			// the inverse m_inv is symmetric
			var m = new DenseMatrix(num_factors, num_factors);
			for (int f_1 = 0; f_1 < num_factors; f_1++)
				for (int f_2 = f_1; f_2 < num_factors; f_2++)
				{
					double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
					if (f_1 == f_2)
						d += regularization;
					m[f_1, f_2] = d;
					m[f_2, f_1] = d;
				}
			var m_inv = m.Inverse();
			// write back optimal W
			for (int f = 0; f < num_factors; f++)
			{
				double d = 0;
				for (int f_2 = 0; f_2 < num_factors; f_2++)
					d += m_inv[f, f_2] * HCp[f_2];
				W[u, f] = (float) d;
			}
		}
예제 #42
0
		/// <summary>Display statistics for user and item attributes</summary>
		/// <param name="user_attributes">the user attributes</param>
		/// <param name="item_attributes">the item attributes</param>
		public static string Statistics(IBooleanMatrix user_attributes, IBooleanMatrix item_attributes)
		{
			string s = string.Empty;
			if (user_attributes != null)
			{
				s += string.Format(
					"{0} user attributes for {1} users, {2} assignments, {3} users with attribute assignments\n",
					user_attributes.NumberOfColumns, user_attributes.NumberOfRows,
					user_attributes.NumberOfEntries, user_attributes.NonEmptyRowIDs.Count);
			}
			if (item_attributes != null)
				s += string.Format(
					"{0} item attributes for {1} items, {2} assignments, {3} items with attribute assignments\n",
					item_attributes.NonEmptyColumnIDs.Count, item_attributes.NumberOfRows,
					item_attributes.NumberOfEntries, item_attributes.NonEmptyRowIDs.Count);
			return s;
		}
예제 #43
0
        ///
        public override void ComputeCorrelations(IBooleanMatrix entity_data)
        {
            var transpose = entity_data.Transpose();

            var overlap = new SparseMatrix<int>(entity_data.NumberOfRows, entity_data.NumberOfRows);

            // go over all (other) entities
            for (int row_id = 0; row_id < transpose.NumberOfRows; row_id++)
            {
                var row = ((IBooleanMatrix) transpose).GetEntriesByRow(row_id);

                for (int i = 0; i < row.Count; i++)
                {
                    int x = row[i];

                    for (int j = i + 1; j < row.Count; j++)
                    {
                        int y = row[j];

                        if (x < y)
                            overlap[x, y]++;
                        else
                            overlap[y, x]++;
                    }
                }
            }

            // the diagonal of the correlation matrix
            for (int i = 0; i < num_entities; i++)
                this[i, i] = 1;

            // compute cosine
            foreach (var index_pair in overlap.NonEmptyEntryIDs)
            {
                int x = index_pair.First;
                int y = index_pair.Second;

                this[x, y] = (float) (overlap[x, y] / Math.Sqrt(entity_data.NumEntriesByRow(x) * entity_data.NumEntriesByRow(y) ));
            }
        }
예제 #44
0
		/// <summary>Display data statistics for item recommendation datasets</summary>
		/// <param name="training_data">the training dataset</param>
		/// <param name="test_data">the test dataset</param>
		/// <param name="user_attributes">the user attributes</param>
		/// <param name="item_attributes">the item attributes</param>
		public static string Statistics(
			this IPosOnlyFeedback training_data, IPosOnlyFeedback test_data = null,
			IBooleanMatrix user_attributes = null, IBooleanMatrix item_attributes = null)
		{
			// training data stats
			int num_users = training_data.AllUsers.Count;
			int num_items = training_data.AllItems.Count;
			long matrix_size = (long) num_users * num_items;
			long empty_size  = (long) matrix_size - training_data.Count;
			double sparsity = (double) 100L * empty_size / matrix_size;
			string s = string.Format(CultureInfo.InvariantCulture, "training data: {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, training_data.Count, sparsity);

			// test data stats
			if (test_data != null)
			{
				num_users = test_data.AllUsers.Count;
				num_items = test_data.AllItems.Count;
				matrix_size = (long) num_users * num_items;
				empty_size  = (long) matrix_size - test_data.Count;
				sparsity = (double) 100L * empty_size / matrix_size; // TODO depends on the eval scheme whether this is correct
				s += string.Format(CultureInfo.InvariantCulture, "test data:     {0} users, {1} items, {2} events, sparsity {3,0:0.#####}\n", num_users, num_items, test_data.Count, sparsity);
			}

			return s + Statistics(user_attributes, item_attributes);
		}
예제 #45
0
파일: WRMF.cs 프로젝트: bemde/MyMediaLite
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        protected virtual void Optimize(IBooleanMatrix data, Matrix<float> W, Matrix<float> H)
        {
            var HH          = new Matrix<double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix<double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new DenseMatrix(num_factors, num_factors);

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                        d += H[i, f_1] * H[i, f_2];
                    HH[f_1, f_2] = d;
                }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);
                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * c_pos;
                        HC_minus_IH[f_1, f_2] = d;
                    }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    foreach (int i in row)
                        //d += H[i, f] * c_pos;
                        d += H[i, f] * (1 + c_pos);
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                            d += regularization;
                        m[f_1, f_2] = d;
                    }
                var m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                        d += m_inv[f, f_2] * HCp[f_2];
                    W[u, f] = (float) d;
                }
            }
        }
예제 #46
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="inverse_data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        void Optimize(IBooleanMatrix data, IBooleanMatrix inverse_data, Matrix <double> W, Matrix <double> H)
        {
            var HH          = new Matrix <double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix <double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors);

            MathNet.Numerics.LinearAlgebra.Matrix m_inv;
            // TODO speed up using more parts of that library

            // TODO using properties gives a 3-5% performance penalty

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HH[f_1, f_2] = d;
                }
            }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);

                // prepare KDD Cup specific weighting
                int num_user_items           = row.Count;
                int user_positive_weight_sum = 0;
                foreach (int i in row)
                {
                    user_positive_weight_sum += inverse_data.NumEntriesByRow(i);
                }
                double neg_weight_normalization = (double)(num_user_items * (1 + CPos)) / (Feedback.Count - user_positive_weight_sum);
                // TODO precompute
                // TODO check whether this is correct

                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                        {
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * CPos;
                        }
                        HC_minus_IH[f_1, f_2] = d;
                    }
                }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int i = 0; i < inverse_data.NumberOfRows; i++)
                    {
                        if (row.Contains(i))
                        {
                            d += H[i, f] * (1 + CPos);
                        }
                        else
                        {
                            d += H[i, f] * inverse_data.NumEntriesByRow(i) * neg_weight_normalization;
                        }
                    }
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                        {
                            d += Regularization;
                        }
                        m[f_1, f_2] = d;
                    }
                }
                m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        d += m_inv[f, f_2] * HCp[f_2];
                    }
                    W[u, f] = d;
                }
            }
        }
예제 #47
0
        /// <summary>Evaluation for rankings of items recommended to groups</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">group recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="group_to_user">group to user relation</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="ignore_overlap">if true, ignore items that appear for a group in the training set when evaluating for that user</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
			this GroupRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
			IBooleanMatrix group_to_user,
			ICollection<int> candidate_items,
			bool ignore_overlap = true)
        {
            var result = new ItemRecommendationEvaluationResults();

            int num_groups = 0;

            foreach (int group_id in group_to_user.NonEmptyRowIDs)
            {
                var users = group_to_user.GetEntriesByRow(group_id);

                var correct_items = new HashSet<int>();
                foreach (int user_id in users)
                    correct_items.UnionWith(test.UserMatrix[user_id]);
                correct_items.IntersectWith(candidate_items);

                var candidate_items_in_train = new HashSet<int>();
                foreach (int user_id in users)
                    candidate_items_in_train.UnionWith(train.UserMatrix[user_id]);
                candidate_items_in_train.IntersectWith(candidate_items);
                int num_eval_items = candidate_items.Count - (ignore_overlap ? candidate_items_in_train.Count() : 0);

                // skip all groups that have 0 or #candidate_items test items
                if (correct_items.Count == 0)
                    continue;
                if (num_eval_items - correct_items.Count == 0)
                    continue;

                IList<int> prediction_list = recommender.RankItems(users, candidate_items);
                if (prediction_list.Count != candidate_items.Count)
                    throw new Exception("Not all items have been ranked.");

                var ignore_items = ignore_overlap ? candidate_items_in_train : new HashSet<int>();

                double auc  = AUC.Compute(prediction_list, correct_items, ignore_items);
                double map  = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items);
                double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items);
                double rr   = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items);
                var positions = new int[] { 5, 10 };
                var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions);
                var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions);

                // thread-safe incrementing
                lock(result)
                {
                    num_groups++;
                    result["AUC"]       += (float) auc;
                    result["MAP"]       += (float) map;
                    result["NDCG"]      += (float) ndcg;
                    result["MRR"]       += (float) rr;
                    result["prec@5"]    += (float) prec[5];
                    result["prec@10"]   += (float) prec[10];
                    result["recall@5"]  += (float) recall[5];
                    result["recall@10"] += (float) recall[10];
                }

                if (num_groups % 1000 == 0)
                    Console.Error.Write(".");
                if (num_groups % 60000 == 0)
                    Console.Error.WriteLine();
            }

            result["num_groups"] = num_groups;
            result["num_lists"]  = num_groups;
            result["num_items"]  = candidate_items.Count;

            return result;
        }